epakai · November 3, 2018 14:16
diff --git a/devhelp2epub.py b/devhelp2epub.py
 #!/usr/bin/env python3

 # Generates epub given a .devhelp2 file.
 # Run this to generate epub for all installed devhelp files on a Debian system:
 # for i in $(locate .devhelp2|grep ^\/usr\/share); do ./devhelp2epub.py $i; done
 import argparse, os, hashlib, locale, collections, sys, zipfile, tempfile, shutil
 import xml.etree.ElementTree as ET
 from ebooklib import epub
 import ebooklib
 ns = {'devhelp': 'http://www.devhelp.net/book'}
 book = epub.EpubBook()

 def main():
    args = parse_arguments()
    tree = ET.parse(os.path.abspath(args.FILE))
    root = tree.getroot()
    
    # Set up book
    title = root.get('title')
    book.set_identifier(hashlib.md5(title.encode()).hexdigest())
    book.set_title(title)
    code = locale.getlocale()[0]
    book.set_language(code[:code.index('_')] if '_' in code else code)
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # add images
    path = os.path.dirname(args.FILE)
    extensions = ('.jpg', '.svg', '.png', '.gif')
    for f in os.listdir(path):
        if f.lower().endswith(extensions):
            image = os.path.join(path, f)
            mimetype = {'.jpg': 'image/jpeg', '.svg': 'image/svg+xml',
                        '.png': 'image/png', '.gif': 'image/gif'}
            book.add_item(epub.EpubItem(file_name=os.path.basename(image),
                                        content=open(image, 'rb').read(),
                                        media_type=mimetype[os.path.splitext(image)[1]]))

    # add stylesheet
    style = open(os.path.join(os.path.dirname(args.FILE), "style.css"), "r").read()
    style_item = epub.EpubItem(uid="style", file_name="Styles/style.css", media_type="text/css", content=style)
    book.add_item(style_item)

    # add all chapters
    chapters = root.find('devhelp:chapters', ns)
    if chapters is None:
        print('Could not find <chapters> Element in {0}. Exiting.'.format(os.path.abspath(args.FILE)), file=sys.stderr)
    else:
        subs = [add_sub(sub, os.path.dirname(args.FILE)) for sub in chapters.findall('devhelp:sub', ns)]
    
    # add table of contents
    book.toc = subs

    # add spine (ordering for all the chapter files)
    # Only keep EpubHtml items in the spine, (Filters out epub.Link items)
    flat_sub_list = flatten(subs)
    spine_list = []
    for sub in flat_sub_list:
        if isinstance(sub, ebooklib.epub.EpubHtml):
            spine_list.append(sub)
    book.spine = ['nav'] + spine_list
    
    # write out epub
    if args.output is None:
        args.output = os.path.basename(args.FILE).replace('devhelp2', 'epub')
    epub.write_epub(args.output, book, {})

    # fix TOC items that become <span> instead of <a>
    # this may be a bug in how the book.toc list is created
    top_level_subs = find_recursive(chapters, 'sub')
    linkify_toc(args.output, top_level_subs)

 def find_recursive(node, element):
    for item in node.findall('devhelp:'+element, ns):
        yield item
        for child in find_recursive(item, element):
            yield child

 def flatten(l):
    for el in l:
        if isinstance(el, collections.Iterable) and not isinstance(el, (str, bytes)):
            yield from flatten(el)
        else:
            yield el

 def add_sub(parent, path):
    if '#' in parent.get('link'):
        sub = epub.Link(href=parent.get('link'), title=parent.get('name'), uid=parent.get('link').split('#', 1)[1])
    else:
        if os.path.exists(os.path.join(path ,parent.get('link'))):
            sub = epub.EpubHtml(title=parent.get('name'), file_name=parent.get('link').split('#', 1)[0])
            sub.content = open(os.path.join(path ,parent.get('link')), 'r').read()
            sub.add_link(href='Styles/style.css', rel='stylesheet', type='text/css')
            book.add_item(sub)
        else:
            print('Missing file: ' + os.path.join(path ,parent.get('link')), file=sys.stderr)
            print('Continuing epub generation', file=sys.stderr)
            return
    subs = [add_sub(child, path) for child in parent.findall('devhelp:sub', ns)]
    if not subs:
        return sub
    else:
        return [sub, subs]

 def parse_arguments():
    parser = argparse.ArgumentParser(
        description='Convert devhelp files to an epub format.')
    parser.add_argument('-o', '--output', dest='output', action='store',
                        help='Output epub filename (default: <input_basename>.epub)')
    parser.add_argument('FILE', action='store',
                        help='Input devhelp2 filename')
    return parser.parse_args()

 def linkify_toc(epub_file, subs_list):
    filename = 'EPUB/nav.xhtml'
    # convert subs list to a dictionary
    sub_dict = {}
    for sub in subs_list:
        name = sub.get("name")
        link = sub.get("link")
        sub_dict[name] = link
    # generate a temp file
    tmpfd, tmpname = tempfile.mkstemp()
    os.close(tmpfd)
    

    # create a temp copy of the archive without filename            
    zin = zipfile.ZipFile(epub_file, 'r')
    zout = zipfile.ZipFile(tmpname, 'w')
    zout.comment = zin.comment # preserve the comment
    # read file to be modified
    nav = zin.read(filename).decode("utf-8")
    for name, link in sub_dict.items():
        nav = nav.replace('<span>'+name+'</span>', '<a href="'+link+'">'+name+'</a>')
   
    # Add files to new archive except filename
    for item in zin.infolist():
        if item.filename != filename:
            zout.writestr(item, zin.read(item.filename))
    zin.close()
    zout.close()

    # replace with the temp archive
    os.remove(epub_file)
    shutil.move(tmpname, epub_file)

    # now add filename with its new data
    with zipfile.ZipFile(epub_file, mode='a', compression=zipfile.ZIP_DEFLATED) as zf:
        zf.writestr(filename, nav)
        zf.close()

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3

	# Generates epub given a .devhelp2 file.
	# Run this to generate epub for all installed devhelp files on a Debian system:
	# for i in $(locate .devhelp2\|grep ^\/usr\/share); do ./devhelp2epub.py $i; done
	import argparse, os, hashlib, locale, collections, sys, zipfile, tempfile, shutil
	import xml.etree.ElementTree as ET
	from ebooklib import epub
	import ebooklib
	ns = {'devhelp': 'http://www.devhelp.net/book'}
	book = epub.EpubBook()

	def main():
	args = parse_arguments()
	tree = ET.parse(os.path.abspath(args.FILE))
	root = tree.getroot()

	# Set up book
	title = root.get('title')
	book.set_identifier(hashlib.md5(title.encode()).hexdigest())
	book.set_title(title)
	code = locale.getlocale()[0]
	book.set_language(code[:code.index('_')] if '_' in code else code)
	book.add_item(epub.EpubNcx())
	book.add_item(epub.EpubNav())

	# add images
	path = os.path.dirname(args.FILE)
	extensions = ('.jpg', '.svg', '.png', '.gif')
	for f in os.listdir(path):
	if f.lower().endswith(extensions):
	image = os.path.join(path, f)
	mimetype = {'.jpg': 'image/jpeg', '.svg': 'image/svg+xml',
	'.png': 'image/png', '.gif': 'image/gif'}
	book.add_item(epub.EpubItem(file_name=os.path.basename(image),
	content=open(image, 'rb').read(),
	media_type=mimetype[os.path.splitext(image)[1]]))

	# add stylesheet
	style = open(os.path.join(os.path.dirname(args.FILE), "style.css"), "r").read()
	style_item = epub.EpubItem(uid="style", file_name="Styles/style.css", media_type="text/css", content=style)
	book.add_item(style_item)

	# add all chapters
	chapters = root.find('devhelp:chapters', ns)
	if chapters is None:
	print('Could not find <chapters> Element in {0}. Exiting.'.format(os.path.abspath(args.FILE)), file=sys.stderr)
	else:
	subs = [add_sub(sub, os.path.dirname(args.FILE)) for sub in chapters.findall('devhelp:sub', ns)]

	# add table of contents
	book.toc = subs

	# add spine (ordering for all the chapter files)
	# Only keep EpubHtml items in the spine, (Filters out epub.Link items)
	flat_sub_list = flatten(subs)
	spine_list = []
	for sub in flat_sub_list:
	if isinstance(sub, ebooklib.epub.EpubHtml):
	spine_list.append(sub)
	book.spine = ['nav'] + spine_list

	# write out epub
	if args.output is None:
	args.output = os.path.basename(args.FILE).replace('devhelp2', 'epub')
	epub.write_epub(args.output, book, {})

	# fix TOC items that become <span> instead of <a>
	# this may be a bug in how the book.toc list is created
	top_level_subs = find_recursive(chapters, 'sub')
	linkify_toc(args.output, top_level_subs)

	def find_recursive(node, element):
	for item in node.findall('devhelp:'+element, ns):
	yield item
	for child in find_recursive(item, element):
	yield child

	def flatten(l):
	for el in l:
	if isinstance(el, collections.Iterable) and not isinstance(el, (str, bytes)):
	yield from flatten(el)
	else:
	yield el

	def add_sub(parent, path):
	if '#' in parent.get('link'):
	sub = epub.Link(href=parent.get('link'), title=parent.get('name'), uid=parent.get('link').split('#', 1)[1])
	else:
	if os.path.exists(os.path.join(path ,parent.get('link'))):
	sub = epub.EpubHtml(title=parent.get('name'), file_name=parent.get('link').split('#', 1)[0])
	sub.content = open(os.path.join(path ,parent.get('link')), 'r').read()
	sub.add_link(href='Styles/style.css', rel='stylesheet', type='text/css')
	book.add_item(sub)
	else:
	print('Missing file: ' + os.path.join(path ,parent.get('link')), file=sys.stderr)
	print('Continuing epub generation', file=sys.stderr)
	return
	subs = [add_sub(child, path) for child in parent.findall('devhelp:sub', ns)]
	if not subs:
	return sub
	else:
	return [sub, subs]

	def parse_arguments():
	parser = argparse.ArgumentParser(
	description='Convert devhelp files to an epub format.')
	parser.add_argument('-o', '--output', dest='output', action='store',
	help='Output epub filename (default: <input_basename>.epub)')
	parser.add_argument('FILE', action='store',
	help='Input devhelp2 filename')
	return parser.parse_args()

	def linkify_toc(epub_file, subs_list):
	filename = 'EPUB/nav.xhtml'
	# convert subs list to a dictionary
	sub_dict = {}
	for sub in subs_list:
	name = sub.get("name")
	link = sub.get("link")
	sub_dict[name] = link
	# generate a temp file
	tmpfd, tmpname = tempfile.mkstemp()
	os.close(tmpfd)


	# create a temp copy of the archive without filename
	zin = zipfile.ZipFile(epub_file, 'r')
	zout = zipfile.ZipFile(tmpname, 'w')
	zout.comment = zin.comment # preserve the comment
	# read file to be modified
	nav = zin.read(filename).decode("utf-8")
	for name, link in sub_dict.items():
	nav = nav.replace('<span>'+name+'</span>', '<a href="'+link+'">'+name+'</a>')

	# Add files to new archive except filename
	for item in zin.infolist():
	if item.filename != filename:
	zout.writestr(item, zin.read(item.filename))
	zin.close()
	zout.close()

	# replace with the temp archive
	os.remove(epub_file)
	shutil.move(tmpname, epub_file)

	# now add filename with its new data
	with zipfile.ZipFile(epub_file, mode='a', compression=zipfile.ZIP_DEFLATED) as zf:
	zf.writestr(filename, nav)
	zf.close()

	if __name__ == "__main__":
	main()
No results found