Jemeni11 · May 2, 2023 15:18
diff --git a/ebook.py b/ebook.py
 import sys

 import ebooklib
 from ebooklib import epub
 from bs4 import BeautifulSoup
 from image import get_image_from_url


 def main(path_to_epub: str) -> None:
 	"""
 	This function updates the FicHub epub file with images.
 	:param path_to_epub: The path to the FicHub epub file.
 	:return: None
 	"""
 	try:
 		book = epub.read_epub(path_to_epub)
 		print(f'Opened {path_to_epub}')

 		new_book = epub.EpubBook()
 		new_book.set_unique_metadata('DC', 'title', book.get_metadata('DC', 'title')[0][0])
 		new_book.set_unique_metadata('DC', 'creator', book.get_metadata('DC', 'creator')[0][0])
 		# new_book.set_cover(book.get_metadata('DC', 'cover'), book.read_cover())
 		for item in book.get_items():
 			if item.content is None:
 				print("NoneType, Skipping")
 			else:
 				new_book.add_item(item)

 		file_name = path_to_epub.split('/')[-1].split('.')[0]

 		for item in new_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
 			try:
 				soup = BeautifulSoup(item.content, "lxml-xml")
 				p_tags = soup.find_all('p')
 				images = [i for i in p_tags if '[img:' in i.text]
 				print(f'Found {len(images)} images in {item.file_name}')
 				# Clean up the images link
 				# Right now they look like this: <p>[img: <a
 				# href="https://i.imgur.com/ABCDEF.jpg" rel="noopener noreferrer">data:image/gif;base64,R0lGODlhA</a>]</p>
 				# But we want to get the link in the href attribute:
 				try:
 					for image in images:
 						if image is None:
 							print("NoneType, Skipping")
 						else:
 							image_link = image.a['href']
 							image_data_tuple = get_image_from_url(image_link)
 							if isinstance(image_data_tuple, tuple):
 								(image_content, image_extension, image_media_type) = get_image_from_url(image_link)
 								image_path = f"images/{item.file_name}_{images.index(image)}.{image_extension}"
 								new_image = f"<img alt='Image {images.index(image)} from {item.file_name}' class='img_center' src={image_path} />"

 								img = epub.EpubItem(
 									uid=f"{item.file_name}_{images.index(image)}",
 									file_name=image_path,
 									media_type=image_media_type,
 									content=image_content,
 								)
 								new_book.add_item(img)
 								image.replace_with(new_image)
 							else:
 								print(f"Error with image {images.index(image)}, skipping ...")
 					item.content = str(soup)
 				except Exception as e:
 					print(f'Error while parsing images: {e}')
 			except TypeError:
 				print("NoneType error, skipping ...")

 		try:
 			new_book.toc = book.toc
 			new_book.add_item(epub.EpubNcx())
 			new_book.add_item(epub.EpubNav())
 			epub.write_epub(f"{file_name}new.epub", new_book)
 			print(f'Wrote {path_to_epub}')
 		except Exception as e:
 			print(f'Error while writing epub: {e}')
 	except FileNotFoundError:
 		print(f'File {path_to_epub} not found.')
 		return


 if __name__ == '__main__':
 	main(sys.argv[1])
diff --git a/image.py b/image.py
 import logging
 import PIL
 from PIL import Image
 from io import BytesIO
 from base64 import b64decode
 import math
 import requests
 from typing import Tuple

 logger = logging.getLogger(__name__)


 def get_image_from_url(
 		url: str,
 		image_format: str = "JPEG",
 		compress_images: bool = False,
 		max_image_size: int = 1_000_000
 ) -> Tuple[bytes, str, str]:
 	"""
 	:param url: The url of the image.
 	:param image_format: The format to convert the image to.
 	:param compress_images: Whether to compress the image or not.
 	:param max_image_size: The maximum size of the image in bytes.
 	:return: A tuple of the image data, the image format and the image mime type.
 	"""
 	try:
 		if url.startswith("https://www.filepicker.io/api/"):
 			logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.")
 			url = f"https://cdn3.fiction.live/fp/{url.split('/')[-1]}?&quality=95"
 		elif url.startswith("data:image") and 'base64' in url:
 			logger.info("Base64 image detected")
 			head, base64data = url.split(',')
 			file_ext = str(head.split(';')[0].split('/')[1])
 			imgdata = b64decode(base64data)
 			if compress_images:
 				if file_ext.lower() == "gif":
 					logger.info("GIF images should not be compressed, skipping compression")
 				else:
 					compressed_base64_image = compress_image(BytesIO(imgdata), max_image_size, file_ext)
 					imgdata = PIL_Image_to_bytes(compressed_base64_image, file_ext)

 			if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]:
 				logger.info(f"Image format {file_ext} not supported, converting to {image_format}")
 				return (
 					_convert_to_new_format(imgdata, image_format).read(),
 					image_format.lower(),
 					f"image/{image_format.lower()}"
 				)
 			return imgdata, file_ext, f"image/{file_ext}"

 		print(url)
 		img = requests.Session().get(url)
 		image = BytesIO(img.content)
 		image.seek(0)

 		PIL_image = Image.open(image)
 		img_format = str(PIL_image.format)

 		if img_format.lower() == "gif":
 			PIL_image = Image.open(image)
 			if PIL_image.info['version'] not in [b"GIF89a", "GIF89a"]:
 				PIL_image.info['version'] = b"GIF89a"
 			return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif"

 		if compress_images:
 			PIL_image = compress_image(image, max_image_size, img_format)

 		return PIL_Image_to_bytes(PIL_image, image_format), image_format, f"image/{image_format.lower()}"

 	except Exception as e:
 		logger.info("Encountered an error downloading image: " + str(e))


 def compress_image(image: BytesIO, target_size: int, image_format: str) -> PIL.Image.Image:
 	image_size = get_size_format(len(image.getvalue()))
 	logger.info(f"Image size: {image_size}")

 	big_photo = Image.open(image).convert("RGBA")

 	target_pixel_count = 2.8114 * target_size
 	if len(image.getvalue()) > target_size:
 		logger.info(f"Image is greater than {get_size_format(target_size)}, compressing")
 		scale_factor = target_pixel_count / math.prod(big_photo.size)
 		if scale_factor < 1:
 			x, y = tuple(int(scale_factor * dim) for dim in big_photo.size)
 			logger.info(f"Resizing image dimensions from {big_photo.size} to ({x}, {y})")
 			sml_photo = big_photo.resize((x, y), resample=Image.LANCZOS)
 		else:
 			sml_photo = big_photo
 		compressed_image_size = get_size_format(len(PIL_Image_to_bytes(sml_photo, image_format)))
 		logger.info(f"Compressed image size: {compressed_image_size}")
 		return sml_photo
 	else:
 		logger.info(f"Image is less than {get_size_format(target_size)}, not compressing")
 		return big_photo


 def PIL_Image_to_bytes(
 		pil_image: PIL.Image.Image,
 		image_format: str
 ) -> bytes:
 	out_io = BytesIO()
 	if image_format.lower().startswith("gif"):
 		frames = []
 		current = pil_image.convert('RGBA')
 		while True:
 			try:
 				frames.append(current)
 				pil_image.seek(pil_image.tell() + 1)
 				current = Image.alpha_composite(current, pil_image.convert('RGBA'))
 			except EOFError:
 				break
 		frames[0].save(out_io, format=image_format, save_all=True, append_images=frames[1:], optimize=True, loop=0)
 		return out_io.getvalue()

 	elif image_format.lower() in ["jpeg", "jpg"]:
 		# Create a new image with a white background
 		background_img = Image.new('RGBA', pil_image.size, "white")

 		# Paste the image on top of the background
 		background_img.paste(pil_image.convert("RGBA"), (0, 0), pil_image.convert("RGBA"))
 		pil_image = background_img.convert('RGB')

 	pil_image.save(out_io, format=image_format, optimize=True, quality=95)
 	return out_io.getvalue()


 def get_size_format(b, factor=1000, suffix="B"):
 	"""
    Scale bytes to its proper byte format
    e.g:
        1253656 => '1.20MB'
        1253656678 => '1.17GB'
    :param b: The size in bytes.
    :param factor: The factor to divide by.
    :param suffix: The suffix to add to the end.
    """
 	for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
 		if b < factor:
 			return f"{b:.2f}{unit}{suffix}"
 		b /= factor
 	return f"{b:.2f}Y{suffix}"


 def _convert_to_new_format(image_bytestream, image_format: str):
 	new_image = BytesIO()
 	try:
 		Image.open(image_bytestream).save(new_image, format=image_format.upper())
 		new_image.name = f'cover.{image_format.lower()}'
 		new_image.seek(0)
 		return new_image
 	except Exception as e:
 		logger.info(f"Encountered an error converting image to {image_format}\nError: {e}")
 		return image_bytestream
	import sys

	import ebooklib
	from ebooklib import epub
	from bs4 import BeautifulSoup
	from image import get_image_from_url


	def main(path_to_epub: str) -> None:
	"""
	This function updates the FicHub epub file with images.
	:param path_to_epub: The path to the FicHub epub file.
	:return: None
	"""
	try:
	book = epub.read_epub(path_to_epub)
	print(f'Opened {path_to_epub}')

	new_book = epub.EpubBook()
	new_book.set_unique_metadata('DC', 'title', book.get_metadata('DC', 'title')[0][0])
	new_book.set_unique_metadata('DC', 'creator', book.get_metadata('DC', 'creator')[0][0])
	# new_book.set_cover(book.get_metadata('DC', 'cover'), book.read_cover())
	for item in book.get_items():
	if item.content is None:
	print("NoneType, Skipping")
	else:
	new_book.add_item(item)

	file_name = path_to_epub.split('/')[-1].split('.')[0]

	for item in new_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
	try:
	soup = BeautifulSoup(item.content, "lxml-xml")
	p_tags = soup.find_all('p')
	images = [i for i in p_tags if '[img:' in i.text]
	print(f'Found {len(images)} images in {item.file_name}')
	# Clean up the images link
	# Right now they look like this: <p>[img: <a
	# href="https://i.imgur.com/ABCDEF.jpg" rel="noopener noreferrer">data:image/gif;base64,R0lGODlhA</a>]</p>
	# But we want to get the link in the href attribute:
	try:
	for image in images:
	if image is None:
	print("NoneType, Skipping")
	else:
	image_link = image.a['href']
	image_data_tuple = get_image_from_url(image_link)
	if isinstance(image_data_tuple, tuple):
	(image_content, image_extension, image_media_type) = get_image_from_url(image_link)
	image_path = f"images/{item.file_name}_{images.index(image)}.{image_extension}"
	new_image = f"<img alt='Image {images.index(image)} from {item.file_name}' class='img_center' src={image_path} />"

	img = epub.EpubItem(
	uid=f"{item.file_name}_{images.index(image)}",
	file_name=image_path,
	media_type=image_media_type,
	content=image_content,
	)
	new_book.add_item(img)
	image.replace_with(new_image)
	else:
	print(f"Error with image {images.index(image)}, skipping ...")
	item.content = str(soup)
	except Exception as e:
	print(f'Error while parsing images: {e}')
	except TypeError:
	print("NoneType error, skipping ...")

	try:
	new_book.toc = book.toc
	new_book.add_item(epub.EpubNcx())
	new_book.add_item(epub.EpubNav())
	epub.write_epub(f"{file_name}new.epub", new_book)
	print(f'Wrote {path_to_epub}')
	except Exception as e:
	print(f'Error while writing epub: {e}')
	except FileNotFoundError:
	print(f'File {path_to_epub} not found.')
	return


	if __name__ == '__main__':
	main(sys.argv[1])
	import logging
	import PIL
	from PIL import Image
	from io import BytesIO
	from base64 import b64decode
	import math
	import requests
	from typing import Tuple

	logger = logging.getLogger(__name__)


	def get_image_from_url(
	url: str,
	image_format: str = "JPEG",
	compress_images: bool = False,
	max_image_size: int = 1_000_000
	) -> Tuple[bytes, str, str]:
	"""
	:param url: The url of the image.
	:param image_format: The format to convert the image to.
	:param compress_images: Whether to compress the image or not.
	:param max_image_size: The maximum size of the image in bytes.
	:return: A tuple of the image data, the image format and the image mime type.
	"""
	try:
	if url.startswith("https://www.filepicker.io/api/"):
	logger.warning("Filepicker.io image detected, converting to Fiction.live image. This might fail.")
	url = f"https://cdn3.fiction.live/fp/{url.split('/')[-1]}?&quality=95"
	elif url.startswith("data:image") and 'base64' in url:
	logger.info("Base64 image detected")
	head, base64data = url.split(',')
	file_ext = str(head.split(';')[0].split('/')[1])
	imgdata = b64decode(base64data)
	if compress_images:
	if file_ext.lower() == "gif":
	logger.info("GIF images should not be compressed, skipping compression")
	else:
	compressed_base64_image = compress_image(BytesIO(imgdata), max_image_size, file_ext)
	imgdata = PIL_Image_to_bytes(compressed_base64_image, file_ext)

	if file_ext.lower() not in ["jpg", "jpeg", "png", "gif"]:
	logger.info(f"Image format {file_ext} not supported, converting to {image_format}")
	return (
	_convert_to_new_format(imgdata, image_format).read(),
	image_format.lower(),
	f"image/{image_format.lower()}"
	)
	return imgdata, file_ext, f"image/{file_ext}"

	print(url)
	img = requests.Session().get(url)
	image = BytesIO(img.content)
	image.seek(0)

	PIL_image = Image.open(image)
	img_format = str(PIL_image.format)

	if img_format.lower() == "gif":
	PIL_image = Image.open(image)
	if PIL_image.info['version'] not in [b"GIF89a", "GIF89a"]:
	PIL_image.info['version'] = b"GIF89a"
	return PIL_Image_to_bytes(PIL_image, "GIF"), "gif", "image/gif"

	if compress_images:
	PIL_image = compress_image(image, max_image_size, img_format)

	return PIL_Image_to_bytes(PIL_image, image_format), image_format, f"image/{image_format.lower()}"

	except Exception as e:
	logger.info("Encountered an error downloading image: " + str(e))


	def compress_image(image: BytesIO, target_size: int, image_format: str) -> PIL.Image.Image:
	image_size = get_size_format(len(image.getvalue()))
	logger.info(f"Image size: {image_size}")

	big_photo = Image.open(image).convert("RGBA")

	target_pixel_count = 2.8114 * target_size
	if len(image.getvalue()) > target_size:
	logger.info(f"Image is greater than {get_size_format(target_size)}, compressing")
	scale_factor = target_pixel_count / math.prod(big_photo.size)
	if scale_factor < 1:
	x, y = tuple(int(scale_factor * dim) for dim in big_photo.size)
	logger.info(f"Resizing image dimensions from {big_photo.size} to ({x}, {y})")
	sml_photo = big_photo.resize((x, y), resample=Image.LANCZOS)
	else:
	sml_photo = big_photo
	compressed_image_size = get_size_format(len(PIL_Image_to_bytes(sml_photo, image_format)))
	logger.info(f"Compressed image size: {compressed_image_size}")
	return sml_photo
	else:
	logger.info(f"Image is less than {get_size_format(target_size)}, not compressing")
	return big_photo


	def PIL_Image_to_bytes(
	pil_image: PIL.Image.Image,
	image_format: str
	) -> bytes:
	out_io = BytesIO()
	if image_format.lower().startswith("gif"):
	frames = []
	current = pil_image.convert('RGBA')
	while True:
	try:
	frames.append(current)
	pil_image.seek(pil_image.tell() + 1)
	current = Image.alpha_composite(current, pil_image.convert('RGBA'))
	except EOFError:
	break
	frames[0].save(out_io, format=image_format, save_all=True, append_images=frames[1:], optimize=True, loop=0)
	return out_io.getvalue()

	elif image_format.lower() in ["jpeg", "jpg"]:
	# Create a new image with a white background
	background_img = Image.new('RGBA', pil_image.size, "white")

	# Paste the image on top of the background
	background_img.paste(pil_image.convert("RGBA"), (0, 0), pil_image.convert("RGBA"))
	pil_image = background_img.convert('RGB')

	pil_image.save(out_io, format=image_format, optimize=True, quality=95)
	return out_io.getvalue()


	def get_size_format(b, factor=1000, suffix="B"):
	"""
	Scale bytes to its proper byte format
	e.g:
	1253656 => '1.20MB'
	1253656678 => '1.17GB'
	:param b: The size in bytes.
	:param factor: The factor to divide by.
	:param suffix: The suffix to add to the end.
	"""
	for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
	if b < factor:
	return f"{b:.2f}{unit}{suffix}"
	b /= factor
	return f"{b:.2f}Y{suffix}"


	def _convert_to_new_format(image_bytestream, image_format: str):
	new_image = BytesIO()
	try:
	Image.open(image_bytestream).save(new_image, format=image_format.upper())
	new_image.name = f'cover.{image_format.lower()}'
	new_image.seek(0)
	return new_image
	except Exception as e:
	logger.info(f"Encountered an error converting image to {image_format}\nError: {e}")
	return image_bytestream