Created
November 3, 2018 14:16
-
-
Save epakai/23120c1a750eeddef5a7f29924018c90 to your computer and use it in GitHub Desktop.
Generate epub files from Devhelp documentation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # Generates epub given a .devhelp2 file. | |
| # Run this to generate epub for all installed devhelp files on a Debian system: | |
| # for i in $(locate .devhelp2|grep ^\/usr\/share); do ./devhelp2epub.py $i; done | |
| import argparse, os, hashlib, locale, collections, sys, zipfile, tempfile, shutil | |
| import xml.etree.ElementTree as ET | |
| from ebooklib import epub | |
| import ebooklib | |
| ns = {'devhelp': 'http://www.devhelp.net/book'} | |
| book = epub.EpubBook() | |
| def main(): | |
| args = parse_arguments() | |
| tree = ET.parse(os.path.abspath(args.FILE)) | |
| root = tree.getroot() | |
| # Set up book | |
| title = root.get('title') | |
| book.set_identifier(hashlib.md5(title.encode()).hexdigest()) | |
| book.set_title(title) | |
| code = locale.getlocale()[0] | |
| book.set_language(code[:code.index('_')] if '_' in code else code) | |
| book.add_item(epub.EpubNcx()) | |
| book.add_item(epub.EpubNav()) | |
| # add images | |
| path = os.path.dirname(args.FILE) | |
| extensions = ('.jpg', '.svg', '.png', '.gif') | |
| for f in os.listdir(path): | |
| if f.lower().endswith(extensions): | |
| image = os.path.join(path, f) | |
| mimetype = {'.jpg': 'image/jpeg', '.svg': 'image/svg+xml', | |
| '.png': 'image/png', '.gif': 'image/gif'} | |
| book.add_item(epub.EpubItem(file_name=os.path.basename(image), | |
| content=open(image, 'rb').read(), | |
| media_type=mimetype[os.path.splitext(image)[1]])) | |
| # add stylesheet | |
| style = open(os.path.join(os.path.dirname(args.FILE), "style.css"), "r").read() | |
| style_item = epub.EpubItem(uid="style", file_name="Styles/style.css", media_type="text/css", content=style) | |
| book.add_item(style_item) | |
| # add all chapters | |
| chapters = root.find('devhelp:chapters', ns) | |
| if chapters is None: | |
| print('Could not find <chapters> Element in {0}. Exiting.'.format(os.path.abspath(args.FILE)), file=sys.stderr) | |
| else: | |
| subs = [add_sub(sub, os.path.dirname(args.FILE)) for sub in chapters.findall('devhelp:sub', ns)] | |
| # add table of contents | |
| book.toc = subs | |
| # add spine (ordering for all the chapter files) | |
| # Only keep EpubHtml items in the spine, (Filters out epub.Link items) | |
| flat_sub_list = flatten(subs) | |
| spine_list = [] | |
| for sub in flat_sub_list: | |
| if isinstance(sub, ebooklib.epub.EpubHtml): | |
| spine_list.append(sub) | |
| book.spine = ['nav'] + spine_list | |
| # write out epub | |
| if args.output is None: | |
| args.output = os.path.basename(args.FILE).replace('devhelp2', 'epub') | |
| epub.write_epub(args.output, book, {}) | |
| # fix TOC items that become <span> instead of <a> | |
| # this may be a bug in how the book.toc list is created | |
| top_level_subs = find_recursive(chapters, 'sub') | |
| linkify_toc(args.output, top_level_subs) | |
| def find_recursive(node, element): | |
| for item in node.findall('devhelp:'+element, ns): | |
| yield item | |
| for child in find_recursive(item, element): | |
| yield child | |
| def flatten(l): | |
| for el in l: | |
| if isinstance(el, collections.Iterable) and not isinstance(el, (str, bytes)): | |
| yield from flatten(el) | |
| else: | |
| yield el | |
| def add_sub(parent, path): | |
| if '#' in parent.get('link'): | |
| sub = epub.Link(href=parent.get('link'), title=parent.get('name'), uid=parent.get('link').split('#', 1)[1]) | |
| else: | |
| if os.path.exists(os.path.join(path ,parent.get('link'))): | |
| sub = epub.EpubHtml(title=parent.get('name'), file_name=parent.get('link').split('#', 1)[0]) | |
| sub.content = open(os.path.join(path ,parent.get('link')), 'r').read() | |
| sub.add_link(href='Styles/style.css', rel='stylesheet', type='text/css') | |
| book.add_item(sub) | |
| else: | |
| print('Missing file: ' + os.path.join(path ,parent.get('link')), file=sys.stderr) | |
| print('Continuing epub generation', file=sys.stderr) | |
| return | |
| subs = [add_sub(child, path) for child in parent.findall('devhelp:sub', ns)] | |
| if not subs: | |
| return sub | |
| else: | |
| return [sub, subs] | |
| def parse_arguments(): | |
| parser = argparse.ArgumentParser( | |
| description='Convert devhelp files to an epub format.') | |
| parser.add_argument('-o', '--output', dest='output', action='store', | |
| help='Output epub filename (default: <input_basename>.epub)') | |
| parser.add_argument('FILE', action='store', | |
| help='Input devhelp2 filename') | |
| return parser.parse_args() | |
| def linkify_toc(epub_file, subs_list): | |
| filename = 'EPUB/nav.xhtml' | |
| # convert subs list to a dictionary | |
| sub_dict = {} | |
| for sub in subs_list: | |
| name = sub.get("name") | |
| link = sub.get("link") | |
| sub_dict[name] = link | |
| # generate a temp file | |
| tmpfd, tmpname = tempfile.mkstemp() | |
| os.close(tmpfd) | |
| # create a temp copy of the archive without filename | |
| zin = zipfile.ZipFile(epub_file, 'r') | |
| zout = zipfile.ZipFile(tmpname, 'w') | |
| zout.comment = zin.comment # preserve the comment | |
| # read file to be modified | |
| nav = zin.read(filename).decode("utf-8") | |
| for name, link in sub_dict.items(): | |
| nav = nav.replace('<span>'+name+'</span>', '<a href="'+link+'">'+name+'</a>') | |
| # Add files to new archive except filename | |
| for item in zin.infolist(): | |
| if item.filename != filename: | |
| zout.writestr(item, zin.read(item.filename)) | |
| zin.close() | |
| zout.close() | |
| # replace with the temp archive | |
| os.remove(epub_file) | |
| shutil.move(tmpname, epub_file) | |
| # now add filename with its new data | |
| with zipfile.ZipFile(epub_file, mode='a', compression=zipfile.ZIP_DEFLATED) as zf: | |
| zf.writestr(filename, nav) | |
| zf.close() | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment