Last active
February 13, 2026 06:50
-
-
Save GaryLee/a370c8da1a75a63a9a4dd3f22c6e4bff to your computer and use it in GitHub Desktop.
A codegen tool which can put the generating code in comment.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python | |
| # coding: utf-8 | |
| """ | |
| Utility functions for comment-based code generation. | |
| Single line code generation: | |
| - Lines starting with the specified print symbol (default: '//? ') within the block are treated as f-string of Python. | |
| - The leading spaces of f-string lines is for indentation in Python code. Not the generated code. | |
| - The generated code is inserted into the output at the position of the code generation block. | |
| For example(assume there is a python variable default = 5): | |
| //? int var_a = {default}; | |
| int var_a = 5; | |
| //$ | |
| Multiple line usage: | |
| - Any comment line starting with the specified code symbol (default: '//% ') is considered part of a code generation block. | |
| - The code generation block ends when a comment line with the specified block end symbol (default: '//$') is encountered. | |
| - Lines within the block are executed as Python code. | |
| - The whole file shares the same local and global context for code generation. | |
| - A .py file with the same name as the target file can be included for additional context. | |
| - For example, for 'example.c', if 'example.c.py' exists, it will be executed first to provide context. | |
| For example: | |
| //% num_of_variables = 5 | |
| //% space = lambda n: ' ' * n | |
| //% for i in range(num_of_variables): | |
| //? {space(8)}int var_{i} = {i**2}; | |
| int var_0 = 0; | |
| int var_1 = 1; | |
| int var_2 = 4; | |
| int var_3 = 9; | |
| int var_4 = 16; | |
| //$ | |
| In-line code generation: | |
| - In-line code generation allows embedding code generation statements directly within a line of code using a specific syntax. | |
| - The syntax for in-line code generation is: /*?<f-string>*/ <replacement> /*$*/. | |
| - The <f-string> is a Python formatted string that can include expressions to be evaluated. | |
| - The <replacement> is the part of the line that will be replaced by the evaluated result of the f-string. | |
| - The /*$*/ marks the end of the in-line code generation statement. | |
| For example(assume there is a python variable size = 16): | |
| int data1[/*? {size} */ 16 /*$*/]; | |
| int data2[/*? {size * 2} */ 32 /*$*/]; | |
| """ | |
| import sys | |
| import os | |
| import io | |
| import re | |
| import importlib | |
| from pathlib import Path | |
| from dataclasses import dataclass | |
| from collections.abc import Mapping | |
| INDENT_TEXT = ' ' | |
| INDENT = lambda n: INDENT_TEXT * n | |
| __ = INDENT(1) | |
| ____ = INDENT(2) | |
| ______ = INDENT(3) | |
| ________ = INDENT(4) | |
| __________ = INDENT(5) | |
| ____________ = INDENT(6) | |
| ______________ = INDENT(7) | |
| ________________ = INDENT(8) | |
| LEFT = 0 | |
| RIGHT = 1 | |
| CENTER = 2 | |
| @dataclass | |
| class AlignSpec: | |
| """ | |
| Specification for a column in the grid. | |
| :param align: Alignment of the column. 0: left, 1: right, 2: center. | |
| :param width: Width of the column. | |
| :param padding: Fill character for padding. | |
| """ | |
| align: int = LEFT # 0: left, 1: right, 2: center | |
| width: int = 0 | |
| padding: str = ' ' | |
| def apply(self, data): | |
| align_func = { | |
| LEFT: str.ljust, | |
| RIGHT: str.rjust, | |
| CENTER: str.center | |
| } | |
| return align_func[self.align](str(data), self.width, self.padding) | |
| class CodeGenInfo: | |
| FileNewLine = '\n' | |
| class Grid: | |
| """ | |
| A simple grid for formatting tabular data. | |
| :param align_spec: A dict specifying the alignment for each column. The key is the column index (0-based), and the value is a AlignSpec object. | |
| :param prefix: A string to be added at the beginning of each row. | |
| :param suffix: A string to be added at the end of each row. | |
| """ | |
| _instance = None | |
| def __new__(cls): | |
| if cls._instance is None: | |
| cls._instance = super(Grid, cls).__new__(cls) | |
| # You may need an initialization flag to prevent __init__ from running multiple times | |
| cls._initialized = False | |
| return cls._instance | |
| def __init__(self, align_spec=None, prefix="", suffix="", newline=None): | |
| if not self._initialized: | |
| if align_spec is None: | |
| self.align_spec = {} | |
| else: | |
| assert isinstance(align_spec, dict), "align_spec must be a dict." | |
| self.align_spec = align_spec | |
| self.prefix = prefix | |
| self.suffix = suffix | |
| self._allow_append_row = True | |
| self._curr_collect = None | |
| self._row_collections = {} | |
| self._newline = newline if newline is not None else CodeGenInfo.FileNewLine | |
| self._initialized = True | |
| @property | |
| def rows(self): | |
| """Get all rows from all collections.""" | |
| for c in self._row_collections.values(): | |
| for r in c: | |
| yield r | |
| def rows_in_collection(self, *collection_keys): | |
| """ | |
| Get rows from specified collections. | |
| :param collection_keys: The keys of the collections. If empty, get rows from all collections. | |
| """ | |
| if not collection_keys: | |
| collection_keys = self._row_collections.keys() | |
| for key in collection_keys: | |
| for r in self._row_collections.get(key, []): | |
| yield r | |
| def collection(self, key=None): | |
| """ | |
| Get a collection of rows. | |
| :param key: The key of the collection. | |
| """ | |
| return self._row_collections.get(key, []) | |
| def __call__(self, key, allow=True): | |
| """Switch to a collection of rows. Then use << operator to add rows to the collection. | |
| If cond is False, do nothing. | |
| After adding rows, the current collection will be reset to None. | |
| :param key: The key of the collection. | |
| :param cond: If False, the next << operator will be ignored. If True, the next << operator will add a row to the collection. | |
| """ | |
| self._curr_collect = key | |
| self._allow_append_row = allow() if callable(allow) else bool(allow) | |
| return self | |
| def __lshift__(self, row): | |
| """ | |
| Append a row to the current collection. | |
| :param self: The Grid instance. | |
| :param row: The row to be added, which should be a list or tuple. | |
| """ | |
| if self._allow_append_row: | |
| assert isinstance(row, (tuple, list)), "Only list can be added as a row." | |
| self._row_collections.setdefault(self._curr_collect, []).append(list(map(str, row))) | |
| self._curr_collect = None | |
| self._allow_append_row = True | |
| return self | |
| def prepare(self, *collection_keys): | |
| """ | |
| Calculating the maximum width for each column. | |
| """ | |
| self.max_column_num = 0 | |
| for columns in self.rows_in_collection(*collection_keys): | |
| self.max_column_num = max(self.max_column_num, len(columns)) | |
| for i, col in enumerate(columns): | |
| if i not in self.align_spec: | |
| self.align_spec[i] = AlignSpec(align=LEFT, width=len(str(col))) | |
| else: | |
| self.align_spec[i] = AlignSpec( | |
| align=self.align_spec[i].align, | |
| width=max(self.align_spec[i].width, len(str(col))) | |
| ) | |
| def __iter__(self): | |
| """Arrange columns and iterate over the formatted rows.""" | |
| return self.arrange() | |
| def arrange(self, *collection_keys): | |
| """Arrange columns and iterate over the formatted rows.""" | |
| self.prepare(*collection_keys) | |
| for columns in self.rows_in_collection(*collection_keys) : | |
| items = [] | |
| for i, col in enumerate(columns): | |
| spec = self.align_spec[i] | |
| items.append(spec.apply(col)) | |
| if len(columns) < self.max_column_num: | |
| for i in range(len(columns), self.max_column_num): | |
| items.append(self.align_spec[i].apply("")) | |
| yield self.prefix + "".join(items) + self.suffix | |
| def content(self, *collection_keys, prefix=None, suffix=None, remain=False): | |
| """Get the arranged content as a single string. | |
| :param collection_keys: The keys of the collections to be included. If empty, include all collections. | |
| :param prefix: A string to be added at the beginning of each row. If None, use the default prefix. | |
| :param suffix: A string to be added at the end of each row. If None, use the default suffix. | |
| :param remain: If True, keep the rows in the collections after generating the content. If False, clear the rows after generating the content. | |
| :return: The arranged content as a single string. | |
| """ | |
| self.prefix = prefix if prefix is not None else self.prefix | |
| self.suffix = suffix if suffix is not None else self.suffix | |
| text = self._newline.join(self.arrange(*collection_keys)) | |
| if not remain: | |
| if not collection_keys: | |
| self._row_collections.clear() | |
| else: | |
| for k in self.collection_keys: | |
| self._row_collections.pop(k, None) | |
| self.align_spec.clear() | |
| return text | |
| def import_module(name, location, import_all=False): | |
| """Import a module given its name and file location. This is useful when the file contains special characters.""" | |
| spec = importlib.util.spec_from_file_location(name=name, location=location) | |
| assert spec is not None, f"Cannot find module file: {location}." | |
| module = importlib.util.module_from_spec(spec) | |
| sys.modules[name] = module # Adds the module to sys.modules | |
| spec.loader.exec_module(module) | |
| if import_all: | |
| globals().update(module.__dict__) # from <module> import * | |
| return module | |
| def extract_newline(s): | |
| """Extract newline characters from a string. Return the string without newline characters and the extracted newline characters.""" | |
| newline = [] | |
| for c in reversed(s): | |
| if c not in "\r\n": | |
| break | |
| newline.append(c) | |
| return s[: -len(newline)], "".join(newline[::-1]) | |
| def code_print_to_code(lstrip_ln, code_line_print, codegen_func='_codegen_'): | |
| """ | |
| Convert a code print line to a code generation function call. | |
| :param lstrip_ln: The line with leading spaces stripped. | |
| :param code_line_print: The code line print prefix. | |
| :param codegen_func: The code generation function name. | |
| :return: The generated code line. | |
| :rtype: LiteralString | Any | |
| """ | |
| double_lstrip_ln = lstrip_ln[len(code_line_print):].lstrip() | |
| indent = lstrip_ln[len(code_line_print) : len(lstrip_ln) - len(double_lstrip_ln)] | |
| format_string, newline = extract_newline(double_lstrip_ln) | |
| codegen = f'''{indent}{codegen_func}(rf"""{format_string}"""){newline}''' | |
| return codegen | |
| def codegen_inline_proc(ln, local_vars, global_vars): | |
| """ | |
| Process inline code generation statements in a line. | |
| :param ln: The line to be processed. | |
| :param local_vars: The local variables for evaluation. | |
| :param global_vars: The global variables for evaluation. | |
| :return: The processed line, and a boolean indicating if any replacement was made. | |
| """ | |
| pattern = re.compile(r"""/\*\?(?P<fstr>.*?)\*/(?P<repl>.*?)(?P<end>/\*\s*\$\s*\*/)""") | |
| new_parts = [] | |
| last = 0 | |
| tmp_fstr_var = f"__expanded_fstr_{id(ln)}__" | |
| for m in pattern.finditer(ln): | |
| fstr = m.group('fstr') | |
| exec(f'{tmp_fstr_var} = f"""{fstr}"""', global_vars, local_vars) | |
| value = local_vars[tmp_fstr_var] | |
| local_vars.pop(tmp_fstr_var, None) | |
| repl_span = m.span('repl') | |
| end_span = m.span('end') | |
| new_parts.append(ln[last:repl_span[0]]) | |
| new_parts.append(value) | |
| new_parts.append(ln[repl_span[1]:end_span[1]]) | |
| last = end_span[1] | |
| if new_parts: | |
| new_parts.append(ln[last:]) | |
| return ''.join(new_parts), True | |
| return ln, False | |
| def codegen_proc(filepath, inplace=True, comment=r"//", code_symbol="% ", code_block="$", code_print="? ", include_py=True, global_py=None): | |
| """ | |
| Extract code generation blocks from comment and execute the code to generate code. | |
| :param filepath: The file to be processed. | |
| :param inplace: If True, replace the original file with the generated file. If False, create a new file with .code-gen suffix. | |
| :param comment: The comment string used in the file. | |
| :param code_symbol: The symbol indicating the start of a code generation line. | |
| :param code_block: The symbol indicating the end of a code generation block. | |
| :param code_print: The symbol indicating a print statement in the code generation block. | |
| :param include_py: If True, include a .py file with the same name as the target file for additional context. | |
| :param global_py: Optional global variables for the execution context. | |
| :return: True if any code generation blocks were found and processed, False otherwise. | |
| """ | |
| if isinstance(filepath, str): | |
| filepath = Path(filepath) | |
| if not filepath.exists(): | |
| raise FileNotFoundError(f"File not found: {filepath}") | |
| OUT_CODE_BLOCK, IN_CODE_BLOCK = 0, 1 | |
| CODE_LINE_PREFIX = rf"{comment}{code_symbol}" | |
| CODE_LINE_BLOCK_END = rf"{comment}{code_block}" | |
| CODE_LINE_PRINT = rf"{comment}{code_print}" | |
| state = OUT_CODE_BLOCK | |
| local_vars = dict() | |
| global_vars = globals() | |
| cwd = str(filepath.parent) | |
| if cwd not in sys.path: | |
| sys.path.append(cwd) | |
| next_state = state | |
| out = io.StringIO() | |
| local_vars["_codegen_"] = lambda *args, **kwargs: print( | |
| *args, **kwargs, file=out | |
| ) | |
| local_vars["_filepath_"] = str(filepath) | |
| if global_py: | |
| global_py_path = Path(global_py) | |
| if global_py_path.exists(): | |
| # print(f"{global_py_path=}") | |
| basename = global_py_path.stem.replace('.', '_') | |
| import_module(name=basename, location=str(global_py_path), import_all=True) | |
| if include_py: | |
| pyfile = filepath.with_suffix('.py') | |
| # print(f"{pyfile=} (exists={pyfile.exists()})") | |
| if pyfile.exists(): | |
| local_vars["_pyfile_"] = str(pyfile) | |
| exec(pyfile.read_text(), global_vars, local_vars) | |
| codegen_block_count = 0 | |
| for ln_idx, ln in enumerate(filepath.read_text().splitlines(keepends=True), start=1): | |
| lstrip_ln = ln.lstrip() | |
| state = next_state | |
| # Record the newline style from the first line. | |
| if ln_idx == 1: | |
| CodeGenInfo.FileNewLine = extract_newline(ln)[1] | |
| if state == OUT_CODE_BLOCK: | |
| if lstrip_ln.startswith(CODE_LINE_PREFIX): | |
| next_state = IN_CODE_BLOCK | |
| codegen_block_count += 1 | |
| content = io.StringIO() | |
| content.write(lstrip_ln[len(CODE_LINE_PREFIX):]) | |
| elif lstrip_ln.startswith(CODE_LINE_PRINT): | |
| next_state = IN_CODE_BLOCK | |
| codegen_block_count += 1 | |
| content = io.StringIO() | |
| codegen = code_print_to_code(lstrip_ln, CODE_LINE_PRINT, codegen_func='_codegen_') | |
| content.write(codegen) | |
| elif state == IN_CODE_BLOCK: | |
| if lstrip_ln.startswith(CODE_LINE_BLOCK_END): | |
| next_state = OUT_CODE_BLOCK | |
| content.seek(0) | |
| exec(content.read(), global_vars, local_vars) | |
| elif lstrip_ln.startswith(CODE_LINE_PRINT): | |
| codegen = code_print_to_code(lstrip_ln, CODE_LINE_PRINT, codegen_func='_codegen_') | |
| content.write(codegen) | |
| elif lstrip_ln.startswith(CODE_LINE_PREFIX): | |
| content.write(lstrip_ln[len(CODE_LINE_PREFIX):]) | |
| else: | |
| assert False, "Invalid state" | |
| if state == OUT_CODE_BLOCK: | |
| ln, updated = codegen_inline_proc(ln, local_vars, global_vars) | |
| codegen_block_count += 1 if updated else 0 | |
| out.write(ln) | |
| elif state == IN_CODE_BLOCK: | |
| # In code block, only the code generation lines are preserved. | |
| if lstrip_ln.startswith(CODE_LINE_BLOCK_END) or lstrip_ln.startswith(CODE_LINE_PRINT) or lstrip_ln.startswith(CODE_LINE_PREFIX): | |
| out.write(ln) | |
| else: | |
| assert False, "Invalid state" | |
| if codegen_block_count == 0: | |
| return False # No code generation blocks found. | |
| out.seek(0) # Rewind the output buffer to dump whole content. | |
| if inplace: | |
| filepath.rename(filepath.with_suffix(filepath.suffix + ".code-gen.bak")) | |
| filepath.write_text(out.read()) | |
| else: | |
| codegen_file = filepath.with_suffix(filepath.suffix + ".code-gen") | |
| codegen_file.write_text(out.read()) | |
| return True | |
| def argv_wrapper(argv): | |
| """A wrapper for sys.argv to parse command line arguments for code generation.""" | |
| import argparse | |
| parser = argparse.ArgumentParser(description="Comment-based code generation utility.") | |
| parser.add_argument('files', metavar='FILE', type=str, nargs='+', help='Files to process for code generation.') | |
| parser.add_argument('--inplace', action='store_true', help='Replace the original file with the generated file.') | |
| parser.add_argument('--comment', type=str, default='//', help='Comment string used in the file.') | |
| parser.add_argument('--code-symbol', type=str, default='% ', help='Symbol indicating the start of a code generation line.') | |
| parser.add_argument('--code-block', type=str, default='$', help='Symbol indicating the end of a code generation block.') | |
| parser.add_argument('--code-print', type=str, default='? ', help='Symbol indicating a print statement in the code generation block.') | |
| parser.add_argument('--include-py', action='store_true', help='Include a .py file with the same name as the target file for additional context.') | |
| parser.add_argument('--global-py', type=str, default=None, help='Optional global variables for the execution context.') | |
| args = parser.parse_args(argv[1:]) | |
| return vars(args) | |
| if __name__ == '__main__': | |
| if len(sys.argv) <= 1: | |
| print(__doc__) | |
| else: | |
| argkw = argv_wrapper(sys.argv) | |
| files = argkw.pop('files', []) | |
| for fn in files: | |
| cwd = os.getcwd() | |
| fn = Path(fn) | |
| os.chdir(fn.parent) | |
| codegen_proc(fn.name, **argkw) | |
| os.chdir(cwd) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment