Skip to content

Instantly share code, notes, and snippets.

@MoserMichael
Last active January 1, 2026 04:18
Show Gist options
  • Select an option

  • Save MoserMichael/249a41f7fc53ddd275429694685711cc to your computer and use it in GitHub Desktop.

Select an option

Save MoserMichael/249a41f7fc53ddd275429694685711cc to your computer and use it in GitHub Desktop.
llm-talk-from-html-to-markdown
# script for converting LLM saved talks to markdown.
# setup:
#
# Installation:
#
# python3 -m venv .venv
# source .venv/bin/activate
# pip3 install html-to-markdown
#
# repeated usage:
# source .venv/bin/activate
#
# python conv.py -d dir-name-that-contains-talks-to-llm
import argparse
import sys
import pathlib
from html_to_markdown import convert
def parse_arguments():
usage = """Convert files from html to markdown.
Useful when dealing with saved chats to an llm.
"""
parser = argparse.ArgumentParser(
description=usage, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
"-d",
"--dirname", #
help="directory name",
type=str,
required=False
)
parser.add_argument(
"-f",
"--fname", #
help="file name",
type=str,
required=False
)
ret = parser.parse_args()
if ret.dirname == "" or ret.fname == "":
print("Error: either -d or -f arguments required")
sys.exit(1)
return ret
def filter_out_images(md_text):
def is_not_image(line):
return not line.startswith("![SVG Image]")
return '\n'.join(list(filter(is_not_image, md_text.split('\n'))))
def process_file(fname):
md_name = str(pathlib.Path(fname).with_suffix(".md"))
print(f"Converting {fname} to {md_name}")
with open(fname, 'r') as htm_file:
html_text = htm_file.read()
md_text = convert(html_text)
with open(md_name, 'w') as ofile:
ofile.write(filter_out_images(md_text))
def process_dir(dname):
files = pathlib.Path(dname).glob('*.htm?')
for file in files:
if file.is_file():
fname = str(file)
process_file(fname)
def do_it():
arg = parse_arguments()
if arg.dirname:
dir_full = str(pathlib.Path(arg.dirname).resolve())
process_dir(dir_full)
if arg.fname:
fname_full = str(pathlib.Path(arg.fname).resolve())
process_file(fname_full)
if __name__ == "__main__":
do_it()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment