Last active
May 2, 2023 15:18
-
-
Save Jemeni11/b297e899244bf974c03df06499cfd2d4 to your computer and use it in GitHub Desktop.
This code should edit an ePub by grabbing the href attribute from the "[img: <a href='' >..</>" string inside of a p tag, downloading the images, adding them and creating a new ePub. THIS DOES NOT WORK.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import ebooklib | |
| from ebooklib import epub | |
| from bs4 import BeautifulSoup | |
| from image import get_image_from_url | |
| def main(path_to_epub: str) -> None: | |
| """ | |
| This function updates the FicHub epub file with images. | |
| :param path_to_epub: The path to the FicHub epub file. | |
| :return: None | |
| """ | |
| try: | |
| book = epub.read_epub(path_to_epub) | |
| print(f'Opened {path_to_epub}') | |
| new_book = epub.EpubBook() | |
| new_book.set_unique_metadata('DC', 'title', book.get_metadata('DC', 'title')[0][0]) | |
| new_book.set_unique_metadata('DC', 'creator', book.get_metadata('DC', 'creator')[0][0]) | |
| # new_book.set_cover(book.get_metadata('DC', 'cover'), book.read_cover()) | |
| for item in book.get_items(): | |
| if item.content is None: | |
| print("NoneType, Skipping") | |
| else: | |
| new_book.add_item(item) | |
| file_name = path_to_epub.split('/')[-1].split('.')[0] | |
| for item in new_book.get_items_of_type(ebooklib.ITEM_DOCUMENT): | |
| try: | |
| soup = BeautifulSoup(item.content, "lxml-xml") | |
| p_tags = soup.find_all('p') | |
| images = [i for i in p_tags if '[img:' in i.text] | |
| print(f'Found {len(images)} images in {item.file_name}') | |
| # Clean up the images link | |
| # Right now they look like this: <p>[img: <a | |
| # href="https://i.imgur.com/ABCDEF.jpg" rel="noopener noreferrer">data:image/gif;base64,R0lGODlhA</a>]</p> | |
| # But we want to get the link in the href attribute: | |
| try: | |
| for image in images: | |
| if image is None: | |
| print("NoneType, Skipping") | |
| else: | |
| image_link = image.a['href'] | |
| image_data_tuple = get_image_from_url(image_link) | |
| if isinstance(image_data_tuple, tuple): | |
| (image_content, image_extension, image_media_type) = get_image_from_url(image_link) | |
| image_path = f"images/{item.file_name}_{images.index(image)}.{image_extension}" | |
| new_image = f"<img alt='Image {images.index(image)} from {item.file_name}' class='img_center' src={image_path} />" | |
| img = epub.EpubItem( | |
| uid=f"{item.file_name}_{images.index(image)}", | |
| file_name=image_path, | |
| media_type=image_media_type, | |
| content=image_content, | |
| ) | |
| new_book.add_item(img) | |
| image.replace_with(new_image) | |
| else: | |
| print(f"Error with image {images.index(image)}, skipping ...") | |
| item.content = str(soup) | |
| except Exception as e: | |
| print(f'Error while parsing images: {e}') | |
| except TypeError: | |
| print("NoneType error, skipping ...") | |
| try: | |
| new_book.toc = book.toc | |
| new_book.add_item(epub.EpubNcx()) | |
| new_book.add_item(epub.EpubNav()) | |
| epub.write_epub(f"{file_name}new.epub", new_book) | |
| print(f'Wrote {path_to_epub}') | |
| except Exception as e: | |
| print(f'Error while writing epub: {e}') | |
| except FileNotFoundError: | |
| print(f'File {path_to_epub} not found.') | |
| return | |
| if __name__ == '__main__': | |
| main(sys.argv[1]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import logging | |
| import PIL | |
| from PIL import Image | |
| from io import BytesIO | |
| from base64 import b64decode | |
| import math | |
| import requests | |
| from typing import Tuple | |
| logger = logging.getLogger(__name__) | |
| def get_image_from_url( | |
| url: str, | |
| image_format: str = "JPEG", | |
| compress_images: bool = False, | |
| max_image_size: int = 1_000_000 | |
| ) -> Tuple[bytes, str, str]: | |
| """ | |
| :param url: The url of the image. | |
| :param image_format: The format to convert the image to. | |
| :param compress_images: Whether to compress the image or not. | |
| :param max_image_size: The maximum size of the image in bytes. | |
| :return: A tuple of the image data, the image format and the image mime type. | |
| """ | |
| try: | |
| if url.startswith("https://www.filepicker.io/api/"): | |
| logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.") | |
| url = f"https://cdn3.fiction.live/fp/{url.split('/')[-1]}?&quality=95" | |
| elif url.startswith("data:image") and 'base64' in url: | |
| logger.info("Base64 image detected") | |
| head, base64data = url.split(',') | |
| file_ext = str(head.split(';')[0].split('/')[1]) | |
| imgdata = b64decode(base64data) | |
| if compress_images: | |
| if file_ext.lower() == "gif": | |
| logger.info("GIF images should not be compressed, skipping compression") | |
| else: | |
| compressed_base64_image = compress_image(BytesIO(imgdata), max_image_size, file_ext) | |
| imgdata = PIL_Image_to_bytes(compressed_base64_image, file_ext) | |
| if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]: | |
| logger.info(f"Image format {file_ext} not supported, converting to {image_format}") | |
| return ( | |
| _convert_to_new_format(imgdata, image_format).read(), | |
| image_format.lower(), | |
| f"image/{image_format.lower()}" | |
| ) | |
| return imgdata, file_ext, f"image/{file_ext}" | |
| print(url) | |
| img = requests.Session().get(url) | |
| image = BytesIO(img.content) | |
| image.seek(0) | |
| PIL_image = Image.open(image) | |
| img_format = str(PIL_image.format) | |
| if img_format.lower() == "gif": | |
| PIL_image = Image.open(image) | |
| if PIL_image.info['version'] not in [b"GIF89a", "GIF89a"]: | |
| PIL_image.info['version'] = b"GIF89a" | |
| return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif" | |
| if compress_images: | |
| PIL_image = compress_image(image, max_image_size, img_format) | |
| return PIL_Image_to_bytes(PIL_image, image_format), image_format, f"image/{image_format.lower()}" | |
| except Exception as e: | |
| logger.info("Encountered an error downloading image: " + str(e)) | |
| def compress_image(image: BytesIO, target_size: int, image_format: str) -> PIL.Image.Image: | |
| image_size = get_size_format(len(image.getvalue())) | |
| logger.info(f"Image size: {image_size}") | |
| big_photo = Image.open(image).convert("RGBA") | |
| target_pixel_count = 2.8114 * target_size | |
| if len(image.getvalue()) > target_size: | |
| logger.info(f"Image is greater than {get_size_format(target_size)}, compressing") | |
| scale_factor = target_pixel_count / math.prod(big_photo.size) | |
| if scale_factor < 1: | |
| x, y = tuple(int(scale_factor * dim) for dim in big_photo.size) | |
| logger.info(f"Resizing image dimensions from {big_photo.size} to ({x}, {y})") | |
| sml_photo = big_photo.resize((x, y), resample=Image.LANCZOS) | |
| else: | |
| sml_photo = big_photo | |
| compressed_image_size = get_size_format(len(PIL_Image_to_bytes(sml_photo, image_format))) | |
| logger.info(f"Compressed image size: {compressed_image_size}") | |
| return sml_photo | |
| else: | |
| logger.info(f"Image is less than {get_size_format(target_size)}, not compressing") | |
| return big_photo | |
| def PIL_Image_to_bytes( | |
| pil_image: PIL.Image.Image, | |
| image_format: str | |
| ) -> bytes: | |
| out_io = BytesIO() | |
| if image_format.lower().startswith("gif"): | |
| frames = [] | |
| current = pil_image.convert('RGBA') | |
| while True: | |
| try: | |
| frames.append(current) | |
| pil_image.seek(pil_image.tell() + 1) | |
| current = Image.alpha_composite(current, pil_image.convert('RGBA')) | |
| except EOFError: | |
| break | |
| frames[0].save(out_io, format=image_format, save_all=True, append_images=frames[1:], optimize=True, loop=0) | |
| return out_io.getvalue() | |
| elif image_format.lower() in ["jpeg", "jpg"]: | |
| # Create a new image with a white background | |
| background_img = Image.new('RGBA', pil_image.size, "white") | |
| # Paste the image on top of the background | |
| background_img.paste(pil_image.convert("RGBA"), (0, 0), pil_image.convert("RGBA")) | |
| pil_image = background_img.convert('RGB') | |
| pil_image.save(out_io, format=image_format, optimize=True, quality=95) | |
| return out_io.getvalue() | |
| def get_size_format(b, factor=1000, suffix="B"): | |
| """ | |
| Scale bytes to its proper byte format | |
| e.g: | |
| 1253656 => '1.20MB' | |
| 1253656678 => '1.17GB' | |
| :param b: The size in bytes. | |
| :param factor: The factor to divide by. | |
| :param suffix: The suffix to add to the end. | |
| """ | |
| for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: | |
| if b < factor: | |
| return f"{b:.2f}{unit}{suffix}" | |
| b /= factor | |
| return f"{b:.2f}Y{suffix}" | |
| def _convert_to_new_format(image_bytestream, image_format: str): | |
| new_image = BytesIO() | |
| try: | |
| Image.open(image_bytestream).save(new_image, format=image_format.upper()) | |
| new_image.name = f'cover.{image_format.lower()}' | |
| new_image.seek(0) | |
| return new_image | |
| except Exception as e: | |
| logger.info(f"Encountered an error converting image to {image_format}\nError: {e}") | |
| return image_bytestream |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment