Last active
January 1, 2026 04:18
-
-
Save MoserMichael/249a41f7fc53ddd275429694685711cc to your computer and use it in GitHub Desktop.
llm-talk-from-html-to-markdown
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # script for converting LLM saved talks to markdown. | |
| # setup: | |
| # | |
| # Installation: | |
| # | |
| # python3 -m venv .venv | |
| # source .venv/bin/activate | |
| # pip3 install html-to-markdown | |
| # | |
| # repeated usage: | |
| # source .venv/bin/activate | |
| # | |
| # python conv.py -d dir-name-that-contains-talks-to-llm | |
| import argparse | |
| import sys | |
| import pathlib | |
| from html_to_markdown import convert | |
| def parse_arguments(): | |
| usage = """Convert files from html to markdown. | |
| Useful when dealing with saved chats to an llm. | |
| """ | |
| parser = argparse.ArgumentParser( | |
| description=usage, formatter_class=argparse.RawDescriptionHelpFormatter) | |
| parser.add_argument( | |
| "-d", | |
| "--dirname", # | |
| help="directory name", | |
| type=str, | |
| required=False | |
| ) | |
| parser.add_argument( | |
| "-f", | |
| "--fname", # | |
| help="file name", | |
| type=str, | |
| required=False | |
| ) | |
| ret = parser.parse_args() | |
| if ret.dirname == "" or ret.fname == "": | |
| print("Error: either -d or -f arguments required") | |
| sys.exit(1) | |
| return ret | |
| def filter_out_images(md_text): | |
| def is_not_image(line): | |
| return not line.startswith("![SVG Image]") | |
| return '\n'.join(list(filter(is_not_image, md_text.split('\n')))) | |
| def process_file(fname): | |
| md_name = str(pathlib.Path(fname).with_suffix(".md")) | |
| print(f"Converting {fname} to {md_name}") | |
| with open(fname, 'r') as htm_file: | |
| html_text = htm_file.read() | |
| md_text = convert(html_text) | |
| with open(md_name, 'w') as ofile: | |
| ofile.write(filter_out_images(md_text)) | |
| def process_dir(dname): | |
| files = pathlib.Path(dname).glob('*.htm?') | |
| for file in files: | |
| if file.is_file(): | |
| fname = str(file) | |
| process_file(fname) | |
| def do_it(): | |
| arg = parse_arguments() | |
| if arg.dirname: | |
| dir_full = str(pathlib.Path(arg.dirname).resolve()) | |
| process_dir(dir_full) | |
| if arg.fname: | |
| fname_full = str(pathlib.Path(arg.fname).resolve()) | |
| process_file(fname_full) | |
| if __name__ == "__main__": | |
| do_it() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment