Skip to content

Instantly share code, notes, and snippets.

@micsoftvn
Forked from daniel-j/README.md
Created July 10, 2023 08:42
Show Gist options
  • Save micsoftvn/2f29510ba232d81401486960ee8aebe1 to your computer and use it in GitHub Desktop.
Save micsoftvn/2f29510ba232d81401486960ee8aebe1 to your computer and use it in GitHub Desktop.

Revisions

  1. @daniel-j daniel-j revised this gist Jul 5, 2019. 1 changed file with 24 additions and 17 deletions.
    41 changes: 24 additions & 17 deletions images2epub.py
    Original file line number Diff line number Diff line change
    @@ -72,11 +72,12 @@
    IMAGE_TYPES = {
    'jpeg': 'image/jpeg',
    'jpg': 'image/jpeg',
    'png': 'image/png'
    'png': 'image/png',
    'svg': 'image/svg+xml'
    }


    def image2xhtml(imgfile, width, height, title, epubtype = 'bodymatter', lang = 'en'):
    def image2xhtml(imgfile, width, height, title, epubtype='bodymatter', lang='en'):
    content = '''<?xml version="1.0" encoding="utf-8"?>
    <!DOCTYPE html>
    <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="{lang}">
    @@ -96,7 +97,7 @@ def image2xhtml(imgfile, width, height, title, epubtype = 'bodymatter', lang = '
    return content


    def createOpf(title, author, bookId, imageFiles):
    def create_opf(title, author, bookId, imageFiles):
    package_attributes = {'xmlns': NAMESPACES['OPF'],
    'unique-identifier': 'bookId',
    'version': '3.0',
    @@ -139,6 +140,7 @@ def createOpf(title, author, bookId, imageFiles):
    el.text = 'landscape'

    width, height = imagesize.get(path.join(args.directory, imageFiles[0]))
    # width, height = (-1, -1)
    etree.SubElement(metadata, 'meta', {'name': 'original-resolution', 'content': str(width) + 'x' + str(height)})

    # manifest
    @@ -152,11 +154,12 @@ def createOpf(title, author, bookId, imageFiles):

    for i, img in enumerate(imageFiles):
    uid = UID_FORMAT.format(i)
    ext = path.splitext(img)[1][1:]

    imgattrs = {
    'href': 'images/page-' + uid + path.splitext(img)[1],
    'href': 'images/page-' + uid + '.' + ext,
    'id': 'img-' + uid,
    'media-type': IMAGE_TYPES[path.splitext(img)[1][1:]],
    'media-type': IMAGE_TYPES[ext],
    }
    if i == 0:
    imgattrs['properties'] = 'cover-image'
    @@ -201,11 +204,11 @@ def createOpf(title, author, bookId, imageFiles):
    return tree_str


    def createNcx(title, author, bookId):
    def create_ncx(title, author, book_id):
    return '''<?xml version="1.0" encoding="utf-8" standalone="no"?>
    <ncx:ncx xmlns:ncx="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
    <ncx:head>
    <ncx:meta name="dtb:uid" content="{bookId}"/>
    <ncx:meta name="dtb:uid" content="{book_id}"/>
    <ncx:meta name="dtb:depth" content="1"/>
    <ncx:meta name="dtb:totalPageCount" content="0"/>
    <ncx:meta name="dtb:maxPageNumber" content="0"/>
    @@ -223,14 +226,14 @@ def createNcx(title, author, bookId):
    </ncx:navPoint>
    </ncx:navMap>
    </ncx:ncx>
    '''.format(title=escape(title), author=escape(author), bookId=bookId)
    '''.format(title=escape(title), author=escape(author), book_id=book_id)


    def createNav(title, pageCount):
    pages = [None] * pageCount
    def create_nav(title, page_count):
    pages = [None] * page_count
    for i, page in enumerate(pages):
    uid = UID_FORMAT.format(i)
    pages[i] = ' <li><a href="page-{uid}.xhtml">{pageNumber}</a></li>'.format(uid=uid, pageNumber=i)
    pages[i] = ' <li><a href="page-{uid}.xhtml">{page_number}</a></li>'.format(uid=uid, page_number=i)
    pages.pop(0)

    toc = [(0, title)]
    @@ -302,23 +305,27 @@ def createNav(title, pageCount):
    output.writestr('mimetype', 'application/epub+zip', compress_type=zipfile.ZIP_STORED)
    output.writestr(CONTAINER_PATH, CONTAINER_XML)
    output.writestr(IBOOKS_DISPLAY_OPTIONS_PATH, IBOOKS_DISPLAY_OPTIONS_XML)
    output.writestr('OEBPS/content.opf', createOpf(args.title, args.author, args.storyid, imageFiles))
    output.writestr('OEBPS/toc.ncx', createNcx(args.title, args.author, args.storyid))
    output.writestr('OEBPS/toc.xhtml', createNav(args.title, len(imageFiles)))
    output.writestr('OEBPS/content.opf', create_opf(args.title, args.author, args.storyid, imageFiles))
    output.writestr('OEBPS/toc.ncx', create_ncx(args.title, args.author, args.storyid))
    output.writestr('OEBPS/toc.xhtml', create_nav(args.title, len(imageFiles)))
    output.writestr('OEBPS/imagestyle.css', IMAGESTYLE_CSS)

    for i, img in enumerate(imageFiles):
    uid = UID_FORMAT.format(i)
    title = 'Page ' + str(i)
    ext = path.splitext(img)[1][1:]
    epubtype = 'bodymatter'
    if i == 0:
    title = 'Cover'
    epubtype = 'cover'
    width, height = imagesize.get(path.join(args.directory, img))
    if ext == 'svg':
    width, height = (-1, -1)
    else:
    width, height = imagesize.get(path.join(args.directory, img))
    print(str(round(i/len(imageFiles)*100)) + '%', 'Processing page ' + str(i+1) + ' of ' + str(len(imageFiles)) + ': ' + img, '(' + str(width) + 'x' + str(height) + ')')
    html = image2xhtml('images/page-' + uid + path.splitext(img)[1], width, height, title, epubtype, 'en')
    html = image2xhtml('images/page-' + uid + '.' + ext, width, height, title, epubtype, 'en')
    output.writestr('OEBPS/page-{uid}.xhtml'.format(uid=uid), html)
    output.write(path.join(args.directory, img), 'OEBPS/images/page-' + uid + path.splitext(img)[1])
    output.write(path.join(args.directory, img), 'OEBPS/images/page-' + uid + '.' + ext)

    output.close()
    zipfile.zlib.Z_DEFAULT_COMPRESSION = prev_compression
  2. @daniel-j daniel-j revised this gist Oct 14, 2018. 2 changed files with 70 additions and 4 deletions.
    38 changes: 37 additions & 1 deletion README.md
    Original file line number Diff line number Diff line change
    @@ -9,7 +9,8 @@ Install dependencies with `pip install imagesize lxml`

    ```
    usage: images2epub.py [-h] [-t TITLE] [-a AUTHOR] [-i STORYID] [-d DIRECTION]
    [-s SUBJECT] [-l LEVEL]
    [-s SUBJECT] [-l LEVEL] [--pagelist PAGELIST]
    [--toclist TOCLIST]
    directory output
    positional arguments:
    @@ -30,8 +31,43 @@ optional arguments:
    Subject of the story. Can be used multiple times.
    -l LEVEL, --level LEVEL
    Compression level [0-9] (default: 9)
    --pagelist PAGELIST Text file with list of images
    --toclist TOCLIST Text file with table of contents
    ```

    ## Example

    `./images2epub.py -t "Sailor Moon #1" -a "Naoko Takeuchi" -s "Magical Girl" -s "Manga" -d rtl images/ sailormoon1.epub`


    ## Advanced usage

    You can specify a pagelist. Newlines are ignored:

    ```
    cover.jpg
    prologue.jpg
    chapter-01-cover.jpg
    chapter_01_page_01.jpg
    chapter_01_page_02.jpg
    chapter_02_intro.jpg
    chapter_02_page_01.jpg
    ...
    ```

    You can specify a table of contents (EPUB metadata). Newlines are ignored:

    ```
    Cover
    cover.jpg
    Prologue
    prologue.jpg
    Chapter One
    chapter-01-cover.jpg
    ...
    ```
    36 changes: 33 additions & 3 deletions images2epub.py
    Original file line number Diff line number Diff line change
    @@ -17,6 +17,8 @@
    parser.add_argument('-d', '--direction', help='Reading direction (ltr or rtl, default: ltr)', default='ltr')
    parser.add_argument('-s', '--subject', help='Subject of the story. Can be used multiple times.', action='append', default=[])
    parser.add_argument('-l', '--level', help='Compression level [0-9] (default: 9)', default=9, type=int)
    parser.add_argument('--pagelist', help='Text file with list of images')
    parser.add_argument('--toclist', help='Text file with table of contents')
    parser.add_argument('directory', help='Path to directory with images')
    parser.add_argument('output', help='Output EPUB filename')
    args = parser.parse_args()
    @@ -231,6 +233,27 @@ def createNav(title, pageCount):
    pages[i] = ' <li><a href="page-{uid}.xhtml">{pageNumber}</a></li>'.format(uid=uid, pageNumber=i)
    pages.pop(0)

    toc = [(0, title)]
    if args.toclist:
    toc = []
    title = ""
    img = ""
    with open(args.toclist) as toclist:
    for item in toclist:
    if item.strip():
    if not title:
    title = item.strip()
    else:
    img = item.strip()
    toc.append((imageFiles.index(img), title))
    title = ""
    tochtml = []
    for item in toc:
    (i, name) = item
    print(i, name)
    uid = UID_FORMAT.format(i)
    tochtml.append(' <li epub:type="chapter"><a href="page-{uid}.xhtml">{name}</a></li>'.format(uid=uid, name=escape(name)))

    return '''<?xml version="1.0" encoding="utf-8"?>
    <!DOCTYPE html>
    <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en">
    @@ -242,7 +265,7 @@ def createNav(title, pageCount):
    <h1>Table of Contents</h1>
    <nav epub:type="toc" id="toc">
    <ol>
    <li epub:type="chapter"><a href="page-000.xhtml">{title}</a></li>
    {toc}
    </ol>
    </nav>
    <nav epub:type="page-list">
    @@ -252,10 +275,17 @@ def createNav(title, pageCount):
    </nav>
    </section>
    </body>
    </html>'''.format(pages='\n'.join(pages), title=escape(title))
    </html>'''.format(pages='\n'.join(pages), toc='\n'.join(tochtml), title=escape(title))


    imageFiles = sorted([f for f in listdir(args.directory) if path.isfile(path.join(args.directory, f))])
    if not args.pagelist:
    imageFiles = sorted([f for f in listdir(args.directory) if path.isfile(path.join(args.directory, f))])
    else:
    imageFiles = []
    with open(args.pagelist) as pagelist:
    for page in pagelist:
    if page.strip():
    imageFiles.append(page.strip())

    imageFiles = list(filter(lambda img: path.splitext(img)[1][1:] in IMAGE_TYPES, imageFiles))

  3. @daniel-j daniel-j revised this gist Jul 21, 2018. 1 changed file with 5 additions and 1 deletion.
    6 changes: 5 additions & 1 deletion images2epub.py
    Original file line number Diff line number Diff line change
    @@ -73,6 +73,7 @@
    'png': 'image/png'
    }


    def image2xhtml(imgfile, width, height, title, epubtype = 'bodymatter', lang = 'en'):
    content = '''<?xml version="1.0" encoding="utf-8"?>
    <!DOCTYPE html>
    @@ -92,6 +93,7 @@ def image2xhtml(imgfile, width, height, title, epubtype = 'bodymatter', lang = '
    epubtype=epubtype, lang=lang)
    return content


    def createOpf(title, author, bookId, imageFiles):
    package_attributes = {'xmlns': NAMESPACES['OPF'],
    'unique-identifier': 'bookId',
    @@ -196,6 +198,7 @@ def createOpf(title, author, bookId, imageFiles):
    tree_str = etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True)
    return tree_str


    def createNcx(title, author, bookId):
    return '''<?xml version="1.0" encoding="utf-8" standalone="no"?>
    <ncx:ncx xmlns:ncx="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
    @@ -220,6 +223,7 @@ def createNcx(title, author, bookId):
    </ncx:ncx>
    '''.format(title=escape(title), author=escape(author), bookId=bookId)


    def createNav(title, pageCount):
    pages = [None] * pageCount
    for i, page in enumerate(pages):
    @@ -228,7 +232,6 @@ def createNav(title, pageCount):
    pages.pop(0)

    return '''<?xml version="1.0" encoding="utf-8"?>
    <?xml-model href="http://www.idpf.org/epub/30/schema/epub-nav-30.rnc" type="application/relax-ng-compact-syntax"?>
    <!DOCTYPE html>
    <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en">
    <head>
    @@ -251,6 +254,7 @@ def createNav(title, pageCount):
    </body>
    </html>'''.format(pages='\n'.join(pages), title=escape(title))


    imageFiles = sorted([f for f in listdir(args.directory) if path.isfile(path.join(args.directory, f))])

    imageFiles = list(filter(lambda img: path.splitext(img)[1][1:] in IMAGE_TYPES, imageFiles))
  4. @daniel-j daniel-j revised this gist Apr 24, 2018. 1 changed file with 11 additions and 13 deletions.
    24 changes: 11 additions & 13 deletions images2epub.py
    Original file line number Diff line number Diff line change
    @@ -15,7 +15,7 @@
    parser.add_argument('-a', '--author', help='Author of the story', default="Unknown Author")
    parser.add_argument('-i', '--storyid', help='Story id (default: random)', default='urn:uuid:' + str(uuid4()))
    parser.add_argument('-d', '--direction', help='Reading direction (ltr or rtl, default: ltr)', default='ltr')
    parser.add_argument('-s', '--subject', help='Subject of the story. Can be used multiple times.', action='append')
    parser.add_argument('-s', '--subject', help='Subject of the story. Can be used multiple times.', action='append', default=[])
    parser.add_argument('-l', '--level', help='Compression level [0-9] (default: 9)', default=9, type=int)
    parser.add_argument('directory', help='Path to directory with images')
    parser.add_argument('output', help='Output EPUB filename')
    @@ -76,7 +76,6 @@
    def image2xhtml(imgfile, width, height, title, epubtype = 'bodymatter', lang = 'en'):
    content = '''<?xml version="1.0" encoding="utf-8"?>
    <!DOCTYPE html>
    <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="{lang}">
    <head>
    <meta name="viewport" content="width={width}, height={height}"/>
    @@ -187,7 +186,7 @@ def createOpf(title, author, bookId, imageFiles):
    for i, img in enumerate(imageFiles):
    uid = UID_FORMAT.format(i)
    props = 'page-spread-left'
    if (i % 2 == 0 and args.direction == 'rtl') or (i % 2 != 0 and args.direction == 'ltr'):
    if (i % 2 == 0 and args.direction == 'ltr') or (i % 2 != 0 and args.direction == 'rtl'):
    props = 'page-spread-right'
    etree.SubElement(spine, 'itemref', {
    'idref': 'page-' + uid,
    @@ -202,7 +201,7 @@ def createNcx(title, author, bookId):
    <ncx:ncx xmlns:ncx="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
    <ncx:head>
    <ncx:meta name="dtb:uid" content="{bookId}"/>
    <ncx:meta name="dtb:depth" content="-1"/>
    <ncx:meta name="dtb:depth" content="1"/>
    <ncx:meta name="dtb:totalPageCount" content="0"/>
    <ncx:meta name="dtb:maxPageNumber" content="0"/>
    </ncx:head>
    @@ -213,10 +212,8 @@ def createNcx(title, author, bookId):
    <ncx:text>{author}</ncx:text>
    </ncx:docAuthor>
    <ncx:navMap>
    <ncx:navPoint id="p01" playOrder="1">
    <ncx:navLabel>
    <ncx:text>{title}</ncx:text>
    </ncx:navLabel>
    <ncx:navPoint id="p1" playOrder="1">
    <ncx:navLabel><ncx:text>{title}</ncx:text></ncx:navLabel>
    <ncx:content src="page-000.xhtml"/>
    </ncx:navPoint>
    </ncx:navMap>
    @@ -227,9 +224,12 @@ def createNav(title, pageCount):
    pages = [None] * pageCount
    for i, page in enumerate(pages):
    uid = UID_FORMAT.format(i)
    pages[i] = ' <li><a href="page-{uid}.xhtml">{pageNumber}</a></li>'.format(uid=uid, pageNumber=i+1)
    return '''<?xml version="1.0" encoding="UTF-8"?>
    pages[i] = ' <li><a href="page-{uid}.xhtml">{pageNumber}</a></li>'.format(uid=uid, pageNumber=i)
    pages.pop(0)

    return '''<?xml version="1.0" encoding="utf-8"?>
    <?xml-model href="http://www.idpf.org/epub/30/schema/epub-nav-30.rnc" type="application/relax-ng-compact-syntax"?>
    <!DOCTYPE html>
    <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en">
    <head>
    <title>{title}</title>
    @@ -239,9 +239,7 @@ def createNav(title, pageCount):
    <h1>Table of Contents</h1>
    <nav epub:type="toc" id="toc">
    <ol>
    <li epub:type="chapter" id="toc-01">
    <a href="page-000.xhtml">{title}</a>
    </li>
    <li epub:type="chapter"><a href="page-000.xhtml">{title}</a></li>
    </ol>
    </nav>
    <nav epub:type="page-list">
  5. @daniel-j daniel-j revised this gist Apr 16, 2018. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions images2epub.py
    Original file line number Diff line number Diff line change
    @@ -116,6 +116,8 @@ def createOpf(title, author, bookId, imageFiles):

    el = etree.SubElement(metadata, '{' + NAMESPACES['DC'] + '}creator', {'id': 'creator'})
    el.text = author
    el = etree.SubElement(metadata, 'meta', {'refines': '#creator', 'property': 'role', 'scheme': 'marc:relators'})
    el.text = 'aut'

    el = etree.SubElement(metadata, '{' + NAMESPACES['DC'] + '}language')
    el.text = 'en'
  6. @daniel-j daniel-j revised this gist Apr 16, 2018. 1 changed file with 11 additions and 0 deletions.
    11 changes: 11 additions & 0 deletions images2epub.py
    Original file line number Diff line number Diff line change
    @@ -37,6 +37,16 @@
    </container>
    '''

    IBOOKS_DISPLAY_OPTIONS_PATH = 'META-INF/com.apple.ibooks.display-options.xml'
    IBOOKS_DISPLAY_OPTIONS_XML = '''<?xml version="1.0" encoding="UTF-8"?>
    <display_options>
    <platform name="*">
    <option name="fixed-layout">true</option>
    <option name="open-to-spread">false</option>
    </platform>
    </display_options>
    '''

    IMAGESTYLE_CSS = '''
    @page {
    padding: 0;
    @@ -257,6 +267,7 @@ def createNav(title, pageCount):
    output = zipfile.ZipFile(args.output, 'w', zipfile.ZIP_DEFLATED)
    output.writestr('mimetype', 'application/epub+zip', compress_type=zipfile.ZIP_STORED)
    output.writestr(CONTAINER_PATH, CONTAINER_XML)
    output.writestr(IBOOKS_DISPLAY_OPTIONS_PATH, IBOOKS_DISPLAY_OPTIONS_XML)
    output.writestr('OEBPS/content.opf', createOpf(args.title, args.author, args.storyid, imageFiles))
    output.writestr('OEBPS/toc.ncx', createNcx(args.title, args.author, args.storyid))
    output.writestr('OEBPS/toc.xhtml', createNav(args.title, len(imageFiles)))
  7. @daniel-j daniel-j revised this gist Apr 16, 2018. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion README.md
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,6 @@
    # images2epub.py

    Converts a directory of images into a modern EPUB3 ebook. Use a tool to extract CBZ/CBR/CBT files and then run this program to generate a nice fixed-layout EPUB ebook of it. You can optionally set the reading direction to right-to-left (e.g. for manga).
    Converts a directory of images into a modern EPUB3 ebook. Use a tool to extract CBZ/CBR/CBT files and then run this program to generate a nice fixed-layout EPUB ebook of it. You can optionally set the reading direction to right-to-left (e.g. for manga). For Kobo ereaders, use the file extension .kepub.epub to get the modern reader and correct reading direction.


    ## Usage
  8. @daniel-j daniel-j revised this gist Apr 16, 2018. 1 changed file with 7 additions and 0 deletions.
    7 changes: 7 additions & 0 deletions README.md
    Original file line number Diff line number Diff line change
    @@ -4,6 +4,9 @@ Converts a directory of images into a modern EPUB3 ebook. Use a tool to extract


    ## Usage

    Install dependencies with `pip install imagesize lxml`

    ```
    usage: images2epub.py [-h] [-t TITLE] [-a AUTHOR] [-i STORYID] [-d DIRECTION]
    [-s SUBJECT] [-l LEVEL]
    @@ -28,3 +31,7 @@ optional arguments:
    -l LEVEL, --level LEVEL
    Compression level [0-9] (default: 9)
    ```

    ## Example

    `./images2epub.py -t "Sailor Moon #1" -a "Naoko Takeuchi" -s "Magical Girl" -s "Manga" -d rtl images/ sailormoon1.epub`
  9. @daniel-j daniel-j revised this gist Apr 16, 2018. 2 changed files with 32 additions and 2 deletions.
    30 changes: 30 additions & 0 deletions README.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,30 @@
    # images2epub.py

    Converts a directory of images into a modern EPUB3 ebook. Use a tool to extract CBZ/CBR/CBT files and then run this program to generate a nice fixed-layout EPUB ebook of it. You can optionally set the reading direction to right-to-left (e.g. for manga).


    ## Usage
    ```
    usage: images2epub.py [-h] [-t TITLE] [-a AUTHOR] [-i STORYID] [-d DIRECTION]
    [-s SUBJECT] [-l LEVEL]
    directory output
    positional arguments:
    directory Path to directory with images
    output Output EPUB filename
    optional arguments:
    -h, --help show this help message and exit
    -t TITLE, --title TITLE
    Title of the story
    -a AUTHOR, --author AUTHOR
    Author of the story
    -i STORYID, --storyid STORYID
    Story id (default: random)
    -d DIRECTION, --direction DIRECTION
    Reading direction (ltr or rtl, default: ltr)
    -s SUBJECT, --subject SUBJECT
    Subject of the story. Can be used multiple times.
    -l LEVEL, --level LEVEL
    Compression level [0-9] (default: 9)
    ```
    4 changes: 2 additions & 2 deletions images2epub.py
    Original file line number Diff line number Diff line change
    @@ -13,9 +13,9 @@
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--title', help='Title of the story', default="Unknown Title")
    parser.add_argument('-a', '--author', help='Author of the story', default="Unknown Author")
    parser.add_argument('-s', '--subject', help='Subject of the story. Can be used multiple times.', action='append')
    parser.add_argument('-d', '--direction', help='Reading direction (ltr or rtl, default: ltr)', default='ltr')
    parser.add_argument('-i', '--storyid', help='Story id (default: random)', default='urn:uuid:' + str(uuid4()))
    parser.add_argument('-d', '--direction', help='Reading direction (ltr or rtl, default: ltr)', default='ltr')
    parser.add_argument('-s', '--subject', help='Subject of the story. Can be used multiple times.', action='append')
    parser.add_argument('-l', '--level', help='Compression level [0-9] (default: 9)', default=9, type=int)
    parser.add_argument('directory', help='Path to directory with images')
    parser.add_argument('output', help='Output EPUB filename')
  10. @daniel-j daniel-j revised this gist Apr 16, 2018. 1 changed file with 9 additions and 3 deletions.
    12 changes: 9 additions & 3 deletions images2epub.py
    100644 → 100755
    Original file line number Diff line number Diff line change
    @@ -15,7 +15,8 @@
    parser.add_argument('-a', '--author', help='Author of the story', default="Unknown Author")
    parser.add_argument('-s', '--subject', help='Subject of the story. Can be used multiple times.', action='append')
    parser.add_argument('-d', '--direction', help='Reading direction (ltr or rtl, default: ltr)', default='ltr')
    parser.add_argument('--id', help='Story id (default: random)', default='urn:uuid:' + str(uuid4()))
    parser.add_argument('-i', '--storyid', help='Story id (default: random)', default='urn:uuid:' + str(uuid4()))
    parser.add_argument('-l', '--level', help='Compression level [0-9] (default: 9)', default=9, type=int)
    parser.add_argument('directory', help='Path to directory with images')
    parser.add_argument('output', help='Output EPUB filename')
    args = parser.parse_args()
    @@ -250,11 +251,14 @@ def createNav(title, pageCount):

    print('Found ' + str(len(imageFiles)) + ' pages.')

    prev_compression = zipfile.zlib.Z_DEFAULT_COMPRESSION
    zipfile.zlib.Z_DEFAULT_COMPRESSION = args.level

    output = zipfile.ZipFile(args.output, 'w', zipfile.ZIP_DEFLATED)
    output.writestr('mimetype', 'application/epub+zip', compress_type=zipfile.ZIP_STORED)
    output.writestr(CONTAINER_PATH, CONTAINER_XML)
    output.writestr('OEBPS/content.opf', createOpf(args.title, args.author, args.id, imageFiles))
    output.writestr('OEBPS/toc.ncx', createNcx(args.title, args.author, args.id))
    output.writestr('OEBPS/content.opf', createOpf(args.title, args.author, args.storyid, imageFiles))
    output.writestr('OEBPS/toc.ncx', createNcx(args.title, args.author, args.storyid))
    output.writestr('OEBPS/toc.xhtml', createNav(args.title, len(imageFiles)))
    output.writestr('OEBPS/imagestyle.css', IMAGESTYLE_CSS)

    @@ -272,4 +276,6 @@ def createNav(title, pageCount):
    output.write(path.join(args.directory, img), 'OEBPS/images/page-' + uid + path.splitext(img)[1])

    output.close()
    zipfile.zlib.Z_DEFAULT_COMPRESSION = prev_compression

    print('Complete! Saved EPUB as ' + args.output)
  11. @daniel-j daniel-j created this gist Apr 16, 2018.
    275 changes: 275 additions & 0 deletions images2epub.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,275 @@
    #!/usr/bin/env python3

    import sys
    from os import listdir, path
    from lxml import etree
    from html import escape
    from uuid import uuid4
    import argparse
    import datetime
    import zipfile
    import imagesize

    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--title', help='Title of the story', default="Unknown Title")
    parser.add_argument('-a', '--author', help='Author of the story', default="Unknown Author")
    parser.add_argument('-s', '--subject', help='Subject of the story. Can be used multiple times.', action='append')
    parser.add_argument('-d', '--direction', help='Reading direction (ltr or rtl, default: ltr)', default='ltr')
    parser.add_argument('--id', help='Story id (default: random)', default='urn:uuid:' + str(uuid4()))
    parser.add_argument('directory', help='Path to directory with images')
    parser.add_argument('output', help='Output EPUB filename')
    args = parser.parse_args()

    if args.direction != 'rtl':
    args.direction = 'ltr'

    UID_FORMAT = '{:03d}'
    NAMESPACES = {'OPF': 'http://www.idpf.org/2007/opf',
    'DC': 'http://purl.org/dc/elements/1.1/'}

    CONTAINER_PATH = 'META-INF/container.xml'
    CONTAINER_XML = '''<?xml version='1.0' encoding='utf-8'?>
    <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
    <rootfiles>
    <rootfile media-type="application/oebps-package+xml" full-path="OEBPS/content.opf"/>
    </rootfiles>
    </container>
    '''

    IMAGESTYLE_CSS = '''
    @page {
    padding: 0;
    margin: 0;
    }
    html,
    body {
    padding: 0;
    margin: 0;
    height: 100%;
    }
    #image {
    width: 100%;
    height: 100%;
    display: block;
    margin: 0;
    padding: 0;
    }
    '''

    IMAGE_TYPES = {
    'jpeg': 'image/jpeg',
    'jpg': 'image/jpeg',
    'png': 'image/png'
    }

    def image2xhtml(imgfile, width, height, title, epubtype = 'bodymatter', lang = 'en'):
    content = '''<?xml version="1.0" encoding="utf-8"?>
    <!DOCTYPE html>
    <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="{lang}">
    <head>
    <meta name="viewport" content="width={width}, height={height}"/>
    <title>{title}</title>
    <link rel="stylesheet" type="text/css" href="imagestyle.css"/>
    </head>
    <body epub:type="{epubtype}">
    <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" id="image" version="1.1" viewBox="0 0 {width} {height}"><image width="{width}" height="{height}" xlink:href="{filename}"/></svg>
    </body>
    </html>
    '''.format(width=width, height=height,
    filename=escape(imgfile), title=escape(title),
    epubtype=epubtype, lang=lang)
    return content

    def createOpf(title, author, bookId, imageFiles):
    package_attributes = {'xmlns': NAMESPACES['OPF'],
    'unique-identifier': 'bookId',
    'version': '3.0',
    'prefix': 'rendition: http://www.idpf.org/vocab/rendition/#',
    'dir': args.direction}
    nsmap = {'dc': NAMESPACES['DC'], 'opf': NAMESPACES['OPF']}

    root = etree.Element('package', package_attributes)

    # metadata
    metadata = etree.SubElement(root, 'metadata', nsmap=nsmap)
    el = etree.SubElement(metadata, 'meta', {'property': 'dcterms:modified'})
    el.text = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')

    el = etree.SubElement(metadata, '{' + NAMESPACES['DC'] + '}identifier', {'id': 'bookId'})
    el.text = bookId

    el = etree.SubElement(metadata, '{' + NAMESPACES['DC'] + '}title')
    el.text = title

    el = etree.SubElement(metadata, '{' + NAMESPACES['DC'] + '}creator', {'id': 'creator'})
    el.text = author

    el = etree.SubElement(metadata, '{' + NAMESPACES['DC'] + '}language')
    el.text = 'en'

    for subject in args.subject:
    el = etree.SubElement(metadata, '{' + NAMESPACES['DC'] + '}subject')
    el.text = subject

    etree.SubElement(metadata, 'meta', {'name': 'cover', 'content': 'img-' + UID_FORMAT.format(0)})

    el = etree.SubElement(metadata, 'meta', {'property': 'rendition:layout'})
    el.text = 'pre-paginated'
    el = etree.SubElement(metadata, 'meta', {'property': 'rendition:orientation'})
    el.text = 'portrait'
    el = etree.SubElement(metadata, 'meta', {'property': 'rendition:spread'})
    el.text = 'landscape'

    width, height = imagesize.get(path.join(args.directory, imageFiles[0]))
    etree.SubElement(metadata, 'meta', {'name': 'original-resolution', 'content': str(width) + 'x' + str(height)})

    # manifest
    manifest = etree.SubElement(root, 'manifest')

    etree.SubElement(manifest, 'item', {
    'href': 'imagestyle.css',
    'id': 'imagestyle',
    'media-type': 'text/css'
    })

    for i, img in enumerate(imageFiles):
    uid = UID_FORMAT.format(i)

    imgattrs = {
    'href': 'images/page-' + uid + path.splitext(img)[1],
    'id': 'img-' + uid,
    'media-type': IMAGE_TYPES[path.splitext(img)[1][1:]],
    }
    if i == 0:
    imgattrs['properties'] = 'cover-image'
    etree.SubElement(manifest, 'item', imgattrs)

    etree.SubElement(manifest, 'item', {
    'href': 'page-' + uid + '.xhtml',
    'id': 'page-' + uid,
    'media-type': 'application/xhtml+xml',
    'properties': 'svg'
    })

    etree.SubElement(manifest, 'item', {
    'href': 'toc.ncx',
    'id': 'ncxtoc',
    'media-type': 'application/x-dtbncx+xml',
    })
    etree.SubElement(manifest, 'item', {
    'href': 'toc.xhtml',
    'id': 'toc',
    'media-type': 'application/xhtml+xml',
    'properties': 'nav'
    })

    # spine
    spine = etree.SubElement(root, 'spine', {
    'toc': 'ncxtoc',
    'page-progression-direction': args.direction
    })

    for i, img in enumerate(imageFiles):
    uid = UID_FORMAT.format(i)
    props = 'page-spread-left'
    if (i % 2 == 0 and args.direction == 'rtl') or (i % 2 != 0 and args.direction == 'ltr'):
    props = 'page-spread-right'
    etree.SubElement(spine, 'itemref', {
    'idref': 'page-' + uid,
    'properties': props
    })

    tree_str = etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True)
    return tree_str

    def createNcx(title, author, bookId):
    return '''<?xml version="1.0" encoding="utf-8" standalone="no"?>
    <ncx:ncx xmlns:ncx="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
    <ncx:head>
    <ncx:meta name="dtb:uid" content="{bookId}"/>
    <ncx:meta name="dtb:depth" content="-1"/>
    <ncx:meta name="dtb:totalPageCount" content="0"/>
    <ncx:meta name="dtb:maxPageNumber" content="0"/>
    </ncx:head>
    <ncx:docTitle>
    <ncx:text>{title}</ncx:text>
    </ncx:docTitle>
    <ncx:docAuthor>
    <ncx:text>{author}</ncx:text>
    </ncx:docAuthor>
    <ncx:navMap>
    <ncx:navPoint id="p01" playOrder="1">
    <ncx:navLabel>
    <ncx:text>{title}</ncx:text>
    </ncx:navLabel>
    <ncx:content src="page-000.xhtml"/>
    </ncx:navPoint>
    </ncx:navMap>
    </ncx:ncx>
    '''.format(title=escape(title), author=escape(author), bookId=bookId)

    def createNav(title, pageCount):
    pages = [None] * pageCount
    for i, page in enumerate(pages):
    uid = UID_FORMAT.format(i)
    pages[i] = ' <li><a href="page-{uid}.xhtml">{pageNumber}</a></li>'.format(uid=uid, pageNumber=i+1)
    return '''<?xml version="1.0" encoding="UTF-8"?>
    <?xml-model href="http://www.idpf.org/epub/30/schema/epub-nav-30.rnc" type="application/relax-ng-compact-syntax"?>
    <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en">
    <head>
    <title>{title}</title>
    </head>
    <body>
    <section class="frontmatter" epub:type="frontmatter toc">
    <h1>Table of Contents</h1>
    <nav epub:type="toc" id="toc">
    <ol>
    <li epub:type="chapter" id="toc-01">
    <a href="page-000.xhtml">{title}</a>
    </li>
    </ol>
    </nav>
    <nav epub:type="page-list">
    <ol>
    {pages}
    </ol>
    </nav>
    </section>
    </body>
    </html>'''.format(pages='\n'.join(pages), title=escape(title))

    imageFiles = sorted([f for f in listdir(args.directory) if path.isfile(path.join(args.directory, f))])

    imageFiles = list(filter(lambda img: path.splitext(img)[1][1:] in IMAGE_TYPES, imageFiles))

    if len(imageFiles) < 1:
    print('Too few images:', len(imageFiles))
    sys.exit(1)

    print('Found ' + str(len(imageFiles)) + ' pages.')

    output = zipfile.ZipFile(args.output, 'w', zipfile.ZIP_DEFLATED)
    output.writestr('mimetype', 'application/epub+zip', compress_type=zipfile.ZIP_STORED)
    output.writestr(CONTAINER_PATH, CONTAINER_XML)
    output.writestr('OEBPS/content.opf', createOpf(args.title, args.author, args.id, imageFiles))
    output.writestr('OEBPS/toc.ncx', createNcx(args.title, args.author, args.id))
    output.writestr('OEBPS/toc.xhtml', createNav(args.title, len(imageFiles)))
    output.writestr('OEBPS/imagestyle.css', IMAGESTYLE_CSS)

    for i, img in enumerate(imageFiles):
    uid = UID_FORMAT.format(i)
    title = 'Page ' + str(i)
    epubtype = 'bodymatter'
    if i == 0:
    title = 'Cover'
    epubtype = 'cover'
    width, height = imagesize.get(path.join(args.directory, img))
    print(str(round(i/len(imageFiles)*100)) + '%', 'Processing page ' + str(i+1) + ' of ' + str(len(imageFiles)) + ': ' + img, '(' + str(width) + 'x' + str(height) + ')')
    html = image2xhtml('images/page-' + uid + path.splitext(img)[1], width, height, title, epubtype, 'en')
    output.writestr('OEBPS/page-{uid}.xhtml'.format(uid=uid), html)
    output.write(path.join(args.directory, img), 'OEBPS/images/page-' + uid + path.splitext(img)[1])

    output.close()
    print('Complete! Saved EPUB as ' + args.output)