Skip to content

Instantly share code, notes, and snippets.

@Jemeni11
Last active May 2, 2023 15:18
Show Gist options
  • Save Jemeni11/b297e899244bf974c03df06499cfd2d4 to your computer and use it in GitHub Desktop.
Save Jemeni11/b297e899244bf974c03df06499cfd2d4 to your computer and use it in GitHub Desktop.
This code should edit an ePub by grabbing the href attribute from the "[img: <a href='' >..</>" string inside of a p tag, downloading the images, adding them and creating a new ePub. THIS DOES NOT WORK.
import sys
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
from image import get_image_from_url
def main(path_to_epub: str) -> None:
"""
This function updates the FicHub epub file with images.
:param path_to_epub: The path to the FicHub epub file.
:return: None
"""
try:
book = epub.read_epub(path_to_epub)
print(f'Opened {path_to_epub}')
new_book = epub.EpubBook()
new_book.set_unique_metadata('DC', 'title', book.get_metadata('DC', 'title')[0][0])
new_book.set_unique_metadata('DC', 'creator', book.get_metadata('DC', 'creator')[0][0])
# new_book.set_cover(book.get_metadata('DC', 'cover'), book.read_cover())
for item in book.get_items():
if item.content is None:
print("NoneType, Skipping")
else:
new_book.add_item(item)
file_name = path_to_epub.split('/')[-1].split('.')[0]
for item in new_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
try:
soup = BeautifulSoup(item.content, "lxml-xml")
p_tags = soup.find_all('p')
images = [i for i in p_tags if '[img:' in i.text]
print(f'Found {len(images)} images in {item.file_name}')
# Clean up the images link
# Right now they look like this: <p>[img: <a
# href="https://i.imgur.com/ABCDEF.jpg" rel="noopener noreferrer">data:image/gif;base64,R0lGODlhA</a>]</p>
# But we want to get the link in the href attribute:
try:
for image in images:
if image is None:
print("NoneType, Skipping")
else:
image_link = image.a['href']
image_data_tuple = get_image_from_url(image_link)
if isinstance(image_data_tuple, tuple):
(image_content, image_extension, image_media_type) = get_image_from_url(image_link)
image_path = f"images/{item.file_name}_{images.index(image)}.{image_extension}"
new_image = f"<img alt='Image {images.index(image)} from {item.file_name}' class='img_center' src={image_path} />"
img = epub.EpubItem(
uid=f"{item.file_name}_{images.index(image)}",
file_name=image_path,
media_type=image_media_type,
content=image_content,
)
new_book.add_item(img)
image.replace_with(new_image)
else:
print(f"Error with image {images.index(image)}, skipping ...")
item.content = str(soup)
except Exception as e:
print(f'Error while parsing images: {e}')
except TypeError:
print("NoneType error, skipping ...")
try:
new_book.toc = book.toc
new_book.add_item(epub.EpubNcx())
new_book.add_item(epub.EpubNav())
epub.write_epub(f"{file_name}new.epub", new_book)
print(f'Wrote {path_to_epub}')
except Exception as e:
print(f'Error while writing epub: {e}')
except FileNotFoundError:
print(f'File {path_to_epub} not found.')
return
if __name__ == '__main__':
main(sys.argv[1])
import logging
import PIL
from PIL import Image
from io import BytesIO
from base64 import b64decode
import math
import requests
from typing import Tuple
logger = logging.getLogger(__name__)
def get_image_from_url(
url: str,
image_format: str = "JPEG",
compress_images: bool = False,
max_image_size: int = 1_000_000
) -> Tuple[bytes, str, str]:
"""
:param url: The url of the image.
:param image_format: The format to convert the image to.
:param compress_images: Whether to compress the image or not.
:param max_image_size: The maximum size of the image in bytes.
:return: A tuple of the image data, the image format and the image mime type.
"""
try:
if url.startswith("https://www.filepicker.io/api/"):
logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.")
url = f"https://cdn3.fiction.live/fp/{url.split('/')[-1]}?&quality=95"
elif url.startswith("data:image") and 'base64' in url:
logger.info("Base64 image detected")
head, base64data = url.split(',')
file_ext = str(head.split(';')[0].split('/')[1])
imgdata = b64decode(base64data)
if compress_images:
if file_ext.lower() == "gif":
logger.info("GIF images should not be compressed, skipping compression")
else:
compressed_base64_image = compress_image(BytesIO(imgdata), max_image_size, file_ext)
imgdata = PIL_Image_to_bytes(compressed_base64_image, file_ext)
if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]:
logger.info(f"Image format {file_ext} not supported, converting to {image_format}")
return (
_convert_to_new_format(imgdata, image_format).read(),
image_format.lower(),
f"image/{image_format.lower()}"
)
return imgdata, file_ext, f"image/{file_ext}"
print(url)
img = requests.Session().get(url)
image = BytesIO(img.content)
image.seek(0)
PIL_image = Image.open(image)
img_format = str(PIL_image.format)
if img_format.lower() == "gif":
PIL_image = Image.open(image)
if PIL_image.info['version'] not in [b"GIF89a", "GIF89a"]:
PIL_image.info['version'] = b"GIF89a"
return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif"
if compress_images:
PIL_image = compress_image(image, max_image_size, img_format)
return PIL_Image_to_bytes(PIL_image, image_format), image_format, f"image/{image_format.lower()}"
except Exception as e:
logger.info("Encountered an error downloading image: " + str(e))
def compress_image(image: BytesIO, target_size: int, image_format: str) -> PIL.Image.Image:
image_size = get_size_format(len(image.getvalue()))
logger.info(f"Image size: {image_size}")
big_photo = Image.open(image).convert("RGBA")
target_pixel_count = 2.8114 * target_size
if len(image.getvalue()) > target_size:
logger.info(f"Image is greater than {get_size_format(target_size)}, compressing")
scale_factor = target_pixel_count / math.prod(big_photo.size)
if scale_factor < 1:
x, y = tuple(int(scale_factor * dim) for dim in big_photo.size)
logger.info(f"Resizing image dimensions from {big_photo.size} to ({x}, {y})")
sml_photo = big_photo.resize((x, y), resample=Image.LANCZOS)
else:
sml_photo = big_photo
compressed_image_size = get_size_format(len(PIL_Image_to_bytes(sml_photo, image_format)))
logger.info(f"Compressed image size: {compressed_image_size}")
return sml_photo
else:
logger.info(f"Image is less than {get_size_format(target_size)}, not compressing")
return big_photo
def PIL_Image_to_bytes(
pil_image: PIL.Image.Image,
image_format: str
) -> bytes:
out_io = BytesIO()
if image_format.lower().startswith("gif"):
frames = []
current = pil_image.convert('RGBA')
while True:
try:
frames.append(current)
pil_image.seek(pil_image.tell() + 1)
current = Image.alpha_composite(current, pil_image.convert('RGBA'))
except EOFError:
break
frames[0].save(out_io, format=image_format, save_all=True, append_images=frames[1:], optimize=True, loop=0)
return out_io.getvalue()
elif image_format.lower() in ["jpeg", "jpg"]:
# Create a new image with a white background
background_img = Image.new('RGBA', pil_image.size, "white")
# Paste the image on top of the background
background_img.paste(pil_image.convert("RGBA"), (0, 0), pil_image.convert("RGBA"))
pil_image = background_img.convert('RGB')
pil_image.save(out_io, format=image_format, optimize=True, quality=95)
return out_io.getvalue()
def get_size_format(b, factor=1000, suffix="B"):
"""
Scale bytes to its proper byte format
e.g:
1253656 => '1.20MB'
1253656678 => '1.17GB'
:param b: The size in bytes.
:param factor: The factor to divide by.
:param suffix: The suffix to add to the end.
"""
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
if b < factor:
return f"{b:.2f}{unit}{suffix}"
b /= factor
return f"{b:.2f}Y{suffix}"
def _convert_to_new_format(image_bytestream, image_format: str):
new_image = BytesIO()
try:
Image.open(image_bytestream).save(new_image, format=image_format.upper())
new_image.name = f'cover.{image_format.lower()}'
new_image.seek(0)
return new_image
except Exception as e:
logger.info(f"Encountered an error converting image to {image_format}\nError: {e}")
return image_bytestream
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment