Skip to content

Instantly share code, notes, and snippets.

@epakai
Created November 3, 2018 14:16
Show Gist options
  • Select an option

  • Save epakai/23120c1a750eeddef5a7f29924018c90 to your computer and use it in GitHub Desktop.

Select an option

Save epakai/23120c1a750eeddef5a7f29924018c90 to your computer and use it in GitHub Desktop.
Generate epub files from Devhelp documentation
#!/usr/bin/env python3
# Generates epub given a .devhelp2 file.
# Run this to generate epub for all installed devhelp files on a Debian system:
# for i in $(locate .devhelp2|grep ^\/usr\/share); do ./devhelp2epub.py $i; done
import argparse, os, hashlib, locale, collections, sys, zipfile, tempfile, shutil
import xml.etree.ElementTree as ET
from ebooklib import epub
import ebooklib
ns = {'devhelp': 'http://www.devhelp.net/book'}
book = epub.EpubBook()
def main():
args = parse_arguments()
tree = ET.parse(os.path.abspath(args.FILE))
root = tree.getroot()
# Set up book
title = root.get('title')
book.set_identifier(hashlib.md5(title.encode()).hexdigest())
book.set_title(title)
code = locale.getlocale()[0]
book.set_language(code[:code.index('_')] if '_' in code else code)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
# add images
path = os.path.dirname(args.FILE)
extensions = ('.jpg', '.svg', '.png', '.gif')
for f in os.listdir(path):
if f.lower().endswith(extensions):
image = os.path.join(path, f)
mimetype = {'.jpg': 'image/jpeg', '.svg': 'image/svg+xml',
'.png': 'image/png', '.gif': 'image/gif'}
book.add_item(epub.EpubItem(file_name=os.path.basename(image),
content=open(image, 'rb').read(),
media_type=mimetype[os.path.splitext(image)[1]]))
# add stylesheet
style = open(os.path.join(os.path.dirname(args.FILE), "style.css"), "r").read()
style_item = epub.EpubItem(uid="style", file_name="Styles/style.css", media_type="text/css", content=style)
book.add_item(style_item)
# add all chapters
chapters = root.find('devhelp:chapters', ns)
if chapters is None:
print('Could not find <chapters> Element in {0}. Exiting.'.format(os.path.abspath(args.FILE)), file=sys.stderr)
else:
subs = [add_sub(sub, os.path.dirname(args.FILE)) for sub in chapters.findall('devhelp:sub', ns)]
# add table of contents
book.toc = subs
# add spine (ordering for all the chapter files)
# Only keep EpubHtml items in the spine, (Filters out epub.Link items)
flat_sub_list = flatten(subs)
spine_list = []
for sub in flat_sub_list:
if isinstance(sub, ebooklib.epub.EpubHtml):
spine_list.append(sub)
book.spine = ['nav'] + spine_list
# write out epub
if args.output is None:
args.output = os.path.basename(args.FILE).replace('devhelp2', 'epub')
epub.write_epub(args.output, book, {})
# fix TOC items that become <span> instead of <a>
# this may be a bug in how the book.toc list is created
top_level_subs = find_recursive(chapters, 'sub')
linkify_toc(args.output, top_level_subs)
def find_recursive(node, element):
for item in node.findall('devhelp:'+element, ns):
yield item
for child in find_recursive(item, element):
yield child
def flatten(l):
for el in l:
if isinstance(el, collections.Iterable) and not isinstance(el, (str, bytes)):
yield from flatten(el)
else:
yield el
def add_sub(parent, path):
if '#' in parent.get('link'):
sub = epub.Link(href=parent.get('link'), title=parent.get('name'), uid=parent.get('link').split('#', 1)[1])
else:
if os.path.exists(os.path.join(path ,parent.get('link'))):
sub = epub.EpubHtml(title=parent.get('name'), file_name=parent.get('link').split('#', 1)[0])
sub.content = open(os.path.join(path ,parent.get('link')), 'r').read()
sub.add_link(href='Styles/style.css', rel='stylesheet', type='text/css')
book.add_item(sub)
else:
print('Missing file: ' + os.path.join(path ,parent.get('link')), file=sys.stderr)
print('Continuing epub generation', file=sys.stderr)
return
subs = [add_sub(child, path) for child in parent.findall('devhelp:sub', ns)]
if not subs:
return sub
else:
return [sub, subs]
def parse_arguments():
parser = argparse.ArgumentParser(
description='Convert devhelp files to an epub format.')
parser.add_argument('-o', '--output', dest='output', action='store',
help='Output epub filename (default: <input_basename>.epub)')
parser.add_argument('FILE', action='store',
help='Input devhelp2 filename')
return parser.parse_args()
def linkify_toc(epub_file, subs_list):
filename = 'EPUB/nav.xhtml'
# convert subs list to a dictionary
sub_dict = {}
for sub in subs_list:
name = sub.get("name")
link = sub.get("link")
sub_dict[name] = link
# generate a temp file
tmpfd, tmpname = tempfile.mkstemp()
os.close(tmpfd)
# create a temp copy of the archive without filename
zin = zipfile.ZipFile(epub_file, 'r')
zout = zipfile.ZipFile(tmpname, 'w')
zout.comment = zin.comment # preserve the comment
# read file to be modified
nav = zin.read(filename).decode("utf-8")
for name, link in sub_dict.items():
nav = nav.replace('<span>'+name+'</span>', '<a href="'+link+'">'+name+'</a>')
# Add files to new archive except filename
for item in zin.infolist():
if item.filename != filename:
zout.writestr(item, zin.read(item.filename))
zin.close()
zout.close()
# replace with the temp archive
os.remove(epub_file)
shutil.move(tmpname, epub_file)
# now add filename with its new data
with zipfile.ZipFile(epub_file, mode='a', compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr(filename, nav)
zf.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment