Skip to content

Instantly share code, notes, and snippets.

@GaryLee
Last active February 13, 2026 06:50
Show Gist options
  • Select an option

  • Save GaryLee/a370c8da1a75a63a9a4dd3f22c6e4bff to your computer and use it in GitHub Desktop.

Select an option

Save GaryLee/a370c8da1a75a63a9a4dd3f22c6e4bff to your computer and use it in GitHub Desktop.
A codegen tool which can put the generating code in comment.
#!python
# coding: utf-8
"""
Utility functions for comment-based code generation.
Single line code generation:
- Lines starting with the specified print symbol (default: '//? ') within the block are treated as f-string of Python.
- The leading spaces of f-string lines is for indentation in Python code. Not the generated code.
- The generated code is inserted into the output at the position of the code generation block.
For example(assume there is a python variable default = 5):
//? int var_a = {default};
int var_a = 5;
//$
Multiple line usage:
- Any comment line starting with the specified code symbol (default: '//% ') is considered part of a code generation block.
- The code generation block ends when a comment line with the specified block end symbol (default: '//$') is encountered.
- Lines within the block are executed as Python code.
- The whole file shares the same local and global context for code generation.
- A .py file with the same name as the target file can be included for additional context.
- For example, for 'example.c', if 'example.c.py' exists, it will be executed first to provide context.
For example:
//% num_of_variables = 5
//% space = lambda n: ' ' * n
//% for i in range(num_of_variables):
//? {space(8)}int var_{i} = {i**2};
int var_0 = 0;
int var_1 = 1;
int var_2 = 4;
int var_3 = 9;
int var_4 = 16;
//$
In-line code generation:
- In-line code generation allows embedding code generation statements directly within a line of code using a specific syntax.
- The syntax for in-line code generation is: /*?<f-string>*/ <replacement> /*$*/.
- The <f-string> is a Python formatted string that can include expressions to be evaluated.
- The <replacement> is the part of the line that will be replaced by the evaluated result of the f-string.
- The /*$*/ marks the end of the in-line code generation statement.
For example(assume there is a python variable size = 16):
int data1[/*? {size} */ 16 /*$*/];
int data2[/*? {size * 2} */ 32 /*$*/];
"""
import sys
import os
import io
import re
import importlib
from pathlib import Path
from dataclasses import dataclass
from collections.abc import Mapping
INDENT_TEXT = ' '
INDENT = lambda n: INDENT_TEXT * n
__ = INDENT(1)
____ = INDENT(2)
______ = INDENT(3)
________ = INDENT(4)
__________ = INDENT(5)
____________ = INDENT(6)
______________ = INDENT(7)
________________ = INDENT(8)
LEFT = 0
RIGHT = 1
CENTER = 2
@dataclass
class AlignSpec:
"""
Specification for a column in the grid.
:param align: Alignment of the column. 0: left, 1: right, 2: center.
:param width: Width of the column.
:param padding: Fill character for padding.
"""
align: int = LEFT # 0: left, 1: right, 2: center
width: int = 0
padding: str = ' '
def apply(self, data):
align_func = {
LEFT: str.ljust,
RIGHT: str.rjust,
CENTER: str.center
}
return align_func[self.align](str(data), self.width, self.padding)
class CodeGenInfo:
FileNewLine = '\n'
class Grid:
"""
A simple grid for formatting tabular data.
:param align_spec: A dict specifying the alignment for each column. The key is the column index (0-based), and the value is a AlignSpec object.
:param prefix: A string to be added at the beginning of each row.
:param suffix: A string to be added at the end of each row.
"""
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super(Grid, cls).__new__(cls)
# You may need an initialization flag to prevent __init__ from running multiple times
cls._initialized = False
return cls._instance
def __init__(self, align_spec=None, prefix="", suffix="", newline=None):
if not self._initialized:
if align_spec is None:
self.align_spec = {}
else:
assert isinstance(align_spec, dict), "align_spec must be a dict."
self.align_spec = align_spec
self.prefix = prefix
self.suffix = suffix
self._allow_append_row = True
self._curr_collect = None
self._row_collections = {}
self._newline = newline if newline is not None else CodeGenInfo.FileNewLine
self._initialized = True
@property
def rows(self):
"""Get all rows from all collections."""
for c in self._row_collections.values():
for r in c:
yield r
def rows_in_collection(self, *collection_keys):
"""
Get rows from specified collections.
:param collection_keys: The keys of the collections. If empty, get rows from all collections.
"""
if not collection_keys:
collection_keys = self._row_collections.keys()
for key in collection_keys:
for r in self._row_collections.get(key, []):
yield r
def collection(self, key=None):
"""
Get a collection of rows.
:param key: The key of the collection.
"""
return self._row_collections.get(key, [])
def __call__(self, key, allow=True):
"""Switch to a collection of rows. Then use << operator to add rows to the collection.
If cond is False, do nothing.
After adding rows, the current collection will be reset to None.
:param key: The key of the collection.
:param cond: If False, the next << operator will be ignored. If True, the next << operator will add a row to the collection.
"""
self._curr_collect = key
self._allow_append_row = allow() if callable(allow) else bool(allow)
return self
def __lshift__(self, row):
"""
Append a row to the current collection.
:param self: The Grid instance.
:param row: The row to be added, which should be a list or tuple.
"""
if self._allow_append_row:
assert isinstance(row, (tuple, list)), "Only list can be added as a row."
self._row_collections.setdefault(self._curr_collect, []).append(list(map(str, row)))
self._curr_collect = None
self._allow_append_row = True
return self
def prepare(self, *collection_keys):
"""
Calculating the maximum width for each column.
"""
self.max_column_num = 0
for columns in self.rows_in_collection(*collection_keys):
self.max_column_num = max(self.max_column_num, len(columns))
for i, col in enumerate(columns):
if i not in self.align_spec:
self.align_spec[i] = AlignSpec(align=LEFT, width=len(str(col)))
else:
self.align_spec[i] = AlignSpec(
align=self.align_spec[i].align,
width=max(self.align_spec[i].width, len(str(col)))
)
def __iter__(self):
"""Arrange columns and iterate over the formatted rows."""
return self.arrange()
def arrange(self, *collection_keys):
"""Arrange columns and iterate over the formatted rows."""
self.prepare(*collection_keys)
for columns in self.rows_in_collection(*collection_keys) :
items = []
for i, col in enumerate(columns):
spec = self.align_spec[i]
items.append(spec.apply(col))
if len(columns) < self.max_column_num:
for i in range(len(columns), self.max_column_num):
items.append(self.align_spec[i].apply(""))
yield self.prefix + "".join(items) + self.suffix
def content(self, *collection_keys, prefix=None, suffix=None, remain=False):
"""Get the arranged content as a single string.
:param collection_keys: The keys of the collections to be included. If empty, include all collections.
:param prefix: A string to be added at the beginning of each row. If None, use the default prefix.
:param suffix: A string to be added at the end of each row. If None, use the default suffix.
:param remain: If True, keep the rows in the collections after generating the content. If False, clear the rows after generating the content.
:return: The arranged content as a single string.
"""
self.prefix = prefix if prefix is not None else self.prefix
self.suffix = suffix if suffix is not None else self.suffix
text = self._newline.join(self.arrange(*collection_keys))
if not remain:
if not collection_keys:
self._row_collections.clear()
else:
for k in self.collection_keys:
self._row_collections.pop(k, None)
self.align_spec.clear()
return text
def import_module(name, location, import_all=False):
"""Import a module given its name and file location. This is useful when the file contains special characters."""
spec = importlib.util.spec_from_file_location(name=name, location=location)
assert spec is not None, f"Cannot find module file: {location}."
module = importlib.util.module_from_spec(spec)
sys.modules[name] = module # Adds the module to sys.modules
spec.loader.exec_module(module)
if import_all:
globals().update(module.__dict__) # from <module> import *
return module
def extract_newline(s):
"""Extract newline characters from a string. Return the string without newline characters and the extracted newline characters."""
newline = []
for c in reversed(s):
if c not in "\r\n":
break
newline.append(c)
return s[: -len(newline)], "".join(newline[::-1])
def code_print_to_code(lstrip_ln, code_line_print, codegen_func='_codegen_'):
"""
Convert a code print line to a code generation function call.
:param lstrip_ln: The line with leading spaces stripped.
:param code_line_print: The code line print prefix.
:param codegen_func: The code generation function name.
:return: The generated code line.
:rtype: LiteralString | Any
"""
double_lstrip_ln = lstrip_ln[len(code_line_print):].lstrip()
indent = lstrip_ln[len(code_line_print) : len(lstrip_ln) - len(double_lstrip_ln)]
format_string, newline = extract_newline(double_lstrip_ln)
codegen = f'''{indent}{codegen_func}(rf"""{format_string}"""){newline}'''
return codegen
def codegen_inline_proc(ln, local_vars, global_vars):
"""
Process inline code generation statements in a line.
:param ln: The line to be processed.
:param local_vars: The local variables for evaluation.
:param global_vars: The global variables for evaluation.
:return: The processed line, and a boolean indicating if any replacement was made.
"""
pattern = re.compile(r"""/\*\?(?P<fstr>.*?)\*/(?P<repl>.*?)(?P<end>/\*\s*\$\s*\*/)""")
new_parts = []
last = 0
tmp_fstr_var = f"__expanded_fstr_{id(ln)}__"
for m in pattern.finditer(ln):
fstr = m.group('fstr')
exec(f'{tmp_fstr_var} = f"""{fstr}"""', global_vars, local_vars)
value = local_vars[tmp_fstr_var]
local_vars.pop(tmp_fstr_var, None)
repl_span = m.span('repl')
end_span = m.span('end')
new_parts.append(ln[last:repl_span[0]])
new_parts.append(value)
new_parts.append(ln[repl_span[1]:end_span[1]])
last = end_span[1]
if new_parts:
new_parts.append(ln[last:])
return ''.join(new_parts), True
return ln, False
def codegen_proc(filepath, inplace=True, comment=r"//", code_symbol="% ", code_block="$", code_print="? ", include_py=True, global_py=None):
"""
Extract code generation blocks from comment and execute the code to generate code.
:param filepath: The file to be processed.
:param inplace: If True, replace the original file with the generated file. If False, create a new file with .code-gen suffix.
:param comment: The comment string used in the file.
:param code_symbol: The symbol indicating the start of a code generation line.
:param code_block: The symbol indicating the end of a code generation block.
:param code_print: The symbol indicating a print statement in the code generation block.
:param include_py: If True, include a .py file with the same name as the target file for additional context.
:param global_py: Optional global variables for the execution context.
:return: True if any code generation blocks were found and processed, False otherwise.
"""
if isinstance(filepath, str):
filepath = Path(filepath)
if not filepath.exists():
raise FileNotFoundError(f"File not found: {filepath}")
OUT_CODE_BLOCK, IN_CODE_BLOCK = 0, 1
CODE_LINE_PREFIX = rf"{comment}{code_symbol}"
CODE_LINE_BLOCK_END = rf"{comment}{code_block}"
CODE_LINE_PRINT = rf"{comment}{code_print}"
state = OUT_CODE_BLOCK
local_vars = dict()
global_vars = globals()
cwd = str(filepath.parent)
if cwd not in sys.path:
sys.path.append(cwd)
next_state = state
out = io.StringIO()
local_vars["_codegen_"] = lambda *args, **kwargs: print(
*args, **kwargs, file=out
)
local_vars["_filepath_"] = str(filepath)
if global_py:
global_py_path = Path(global_py)
if global_py_path.exists():
# print(f"{global_py_path=}")
basename = global_py_path.stem.replace('.', '_')
import_module(name=basename, location=str(global_py_path), import_all=True)
if include_py:
pyfile = filepath.with_suffix('.py')
# print(f"{pyfile=} (exists={pyfile.exists()})")
if pyfile.exists():
local_vars["_pyfile_"] = str(pyfile)
exec(pyfile.read_text(), global_vars, local_vars)
codegen_block_count = 0
for ln_idx, ln in enumerate(filepath.read_text().splitlines(keepends=True), start=1):
lstrip_ln = ln.lstrip()
state = next_state
# Record the newline style from the first line.
if ln_idx == 1:
CodeGenInfo.FileNewLine = extract_newline(ln)[1]
if state == OUT_CODE_BLOCK:
if lstrip_ln.startswith(CODE_LINE_PREFIX):
next_state = IN_CODE_BLOCK
codegen_block_count += 1
content = io.StringIO()
content.write(lstrip_ln[len(CODE_LINE_PREFIX):])
elif lstrip_ln.startswith(CODE_LINE_PRINT):
next_state = IN_CODE_BLOCK
codegen_block_count += 1
content = io.StringIO()
codegen = code_print_to_code(lstrip_ln, CODE_LINE_PRINT, codegen_func='_codegen_')
content.write(codegen)
elif state == IN_CODE_BLOCK:
if lstrip_ln.startswith(CODE_LINE_BLOCK_END):
next_state = OUT_CODE_BLOCK
content.seek(0)
exec(content.read(), global_vars, local_vars)
elif lstrip_ln.startswith(CODE_LINE_PRINT):
codegen = code_print_to_code(lstrip_ln, CODE_LINE_PRINT, codegen_func='_codegen_')
content.write(codegen)
elif lstrip_ln.startswith(CODE_LINE_PREFIX):
content.write(lstrip_ln[len(CODE_LINE_PREFIX):])
else:
assert False, "Invalid state"
if state == OUT_CODE_BLOCK:
ln, updated = codegen_inline_proc(ln, local_vars, global_vars)
codegen_block_count += 1 if updated else 0
out.write(ln)
elif state == IN_CODE_BLOCK:
# In code block, only the code generation lines are preserved.
if lstrip_ln.startswith(CODE_LINE_BLOCK_END) or lstrip_ln.startswith(CODE_LINE_PRINT) or lstrip_ln.startswith(CODE_LINE_PREFIX):
out.write(ln)
else:
assert False, "Invalid state"
if codegen_block_count == 0:
return False # No code generation blocks found.
out.seek(0) # Rewind the output buffer to dump whole content.
if inplace:
filepath.rename(filepath.with_suffix(filepath.suffix + ".code-gen.bak"))
filepath.write_text(out.read())
else:
codegen_file = filepath.with_suffix(filepath.suffix + ".code-gen")
codegen_file.write_text(out.read())
return True
def argv_wrapper(argv):
"""A wrapper for sys.argv to parse command line arguments for code generation."""
import argparse
parser = argparse.ArgumentParser(description="Comment-based code generation utility.")
parser.add_argument('files', metavar='FILE', type=str, nargs='+', help='Files to process for code generation.')
parser.add_argument('--inplace', action='store_true', help='Replace the original file with the generated file.')
parser.add_argument('--comment', type=str, default='//', help='Comment string used in the file.')
parser.add_argument('--code-symbol', type=str, default='% ', help='Symbol indicating the start of a code generation line.')
parser.add_argument('--code-block', type=str, default='$', help='Symbol indicating the end of a code generation block.')
parser.add_argument('--code-print', type=str, default='? ', help='Symbol indicating a print statement in the code generation block.')
parser.add_argument('--include-py', action='store_true', help='Include a .py file with the same name as the target file for additional context.')
parser.add_argument('--global-py', type=str, default=None, help='Optional global variables for the execution context.')
args = parser.parse_args(argv[1:])
return vars(args)
if __name__ == '__main__':
if len(sys.argv) <= 1:
print(__doc__)
else:
argkw = argv_wrapper(sys.argv)
files = argkw.pop('files', [])
for fn in files:
cwd = os.getcwd()
fn = Path(fn)
os.chdir(fn.parent)
codegen_proc(fn.name, **argkw)
os.chdir(cwd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment