Skip to content

Instantly share code, notes, and snippets.

@Ghost---Shadow
Created November 3, 2025 06:47
Show Gist options
  • Select an option

  • Save Ghost---Shadow/97bb7a7de7cca7e968ec90cc104da97e to your computer and use it in GitHub Desktop.

Select an option

Save Ghost---Shadow/97bb7a7de7cca7e968ec90cc104da97e to your computer and use it in GitHub Desktop.
Compress PDF files by converting to JPEG files to bring it down under 2MB
#!/usr/bin/env python3
"""
PDF Compression Script - Using PyMuPDF to convert pages to JPG and rebuild
Installation:
pip install PyMuPDF Pillow
Usage:
python compress_pdf_final.py
"""
import os
import sys
import fitz # PyMuPDF
from PIL import Image
from io import BytesIO
def compress_pdf_to_jpg(input_path, output_path, dpi=150, quality=75):
"""
Convert each PDF page to JPEG image and create new PDF
Args:
input_path: Input PDF file path
output_path: Output PDF file path
dpi: Resolution for rendering (lower = smaller file)
quality: JPEG quality (1-100)
"""
# Open the input PDF
input_pdf = fitz.open(input_path)
print(f"Processing {len(input_pdf)} pages...")
# Create a new PDF
output_pdf = fitz.open()
# Process each page
for page_num in range(len(input_pdf)):
print(f" Processing page {page_num + 1}/{len(input_pdf)}", end='\r')
# Get the page
page = input_pdf[page_num]
# Calculate zoom factor based on desired DPI
# Default DPI in PyMuPDF is 72
zoom = dpi / 72
# Create a transformation matrix
mat = fitz.Matrix(zoom, zoom)
# Render page to pixmap (image)
pix = page.get_pixmap(matrix=mat)
# Convert pixmap to PIL Image
img_data = pix.tobytes("png")
img = Image.open(BytesIO(img_data))
# Convert to RGB if needed (remove alpha channel)
if img.mode in ('RGBA', 'LA', 'P'):
background = Image.new('RGB', img.size, (255, 255, 255))
if img.mode == 'P':
img = img.convert('RGBA')
if img.mode in ('RGBA', 'LA'):
background.paste(img, mask=img.split()[-1])
img = background
else:
img = img.convert('RGB')
elif img.mode != 'RGB':
img = img.convert('RGB')
# Compress to JPEG
jpg_buffer = BytesIO()
img.save(jpg_buffer, format='JPEG', quality=quality, optimize=True)
jpg_data = jpg_buffer.getvalue()
# Create a new page in output PDF with same dimensions as original
page_width = page.rect.width
page_height = page.rect.height
new_page = output_pdf.new_page(width=page_width, height=page_height)
# Insert the compressed JPEG image
img_rect = new_page.rect
new_page.insert_image(img_rect, stream=jpg_data)
print(f"\nProcessed {len(input_pdf)} pages")
# Save the output PDF
print(f"Saving compressed PDF...")
output_pdf.save(output_path, garbage=4, deflate=True, clean=True)
output_pdf.close()
input_pdf.close()
print(f"✓ PDF saved to: {output_path}")
def get_file_size_mb(filepath):
"""Get file size in MB"""
return os.path.getsize(filepath) / (1024 * 1024)
def main():
input_pdf = "Deed of Lease@Dr. A. K. Nanda.pdf"
target_size_mb = 2.0
if not os.path.exists(input_pdf):
print(f"Error: Input file '{input_pdf}' not found!")
return 1
original_size = get_file_size_mb(input_pdf)
print(f"Original file size: {original_size:.2f} MB")
print(f"Target size: {target_size_mb} MB")
print(f"Compression needed: {original_size / target_size_mb:.1f}x\n")
# Try different compression levels
# (dpi, quality, level_name)
compression_levels = [
(150, 80, "high"),
(130, 70, "medium-high"),
(110, 60, "medium"),
(90, 50, "medium-low"),
(75, 45, "low"),
(60, 40, "very-low"),
(50, 35, "ultra-low"),
]
for dpi, quality, level_name in compression_levels:
output_pdf = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level_name}.pdf"
print(f"═" * 60)
print(f"Attempting: {level_name} (dpi={dpi}, quality={quality})")
print(f"═" * 60)
try:
compress_pdf_to_jpg(input_pdf, output_pdf, dpi, quality)
compressed_size = get_file_size_mb(output_pdf)
compression_ratio = (1 - compressed_size / original_size) * 100
print(f"\n✓ Compressed file size: {compressed_size:.2f} MB")
print(f"✓ Compression ratio: {compression_ratio:.1f}%")
if compressed_size <= target_size_mb:
# Rename to final output name
final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf"
if os.path.exists(final_output):
os.remove(final_output)
os.rename(output_pdf, final_output)
print(f"\n{'=' * 60}")
print(f"✓ SUCCESS! File is below {target_size_mb} MB")
print(f"{'=' * 60}")
print(f"Original: {original_size:.2f} MB")
print(f"Compressed: {compressed_size:.2f} MB")
print(f"Savings: {original_size - compressed_size:.2f} MB ({compression_ratio:.1f}%)")
print(f"\nOutput saved as: {final_output}")
# Clean up intermediate files
for dpi2, quality2, level2 in compression_levels:
temp_file = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level2}.pdf"
if temp_file != output_pdf and os.path.exists(temp_file):
os.remove(temp_file)
return 0
else:
print(f"✗ Still above target ({compressed_size:.2f} MB > {target_size_mb} MB)")
print("Trying lower quality...\n")
except Exception as e:
print(f"✗ Error during compression: {e}\n")
import traceback
traceback.print_exc()
# If we get here, show the best result
print(f"\n{'=' * 60}")
print("⚠ Could not achieve target size")
print(f"{'=' * 60}")
# Find the smallest compressed file
compressed_files = [f for f in os.listdir('.') if f.startswith('Deed of Lease@Dr. A. K. Nanda_compressed_')]
if compressed_files:
sizes = [(f, get_file_size_mb(f)) for f in compressed_files]
smallest = min(sizes, key=lambda x: x[1])
print(f"Best result: {smallest[0]}")
print(f"Size: {smallest[1]:.2f} MB (target was {target_size_mb} MB)")
print(f"This is {smallest[1] - target_size_mb:.2f} MB over the target.")
# Rename best one to final output
final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf"
if os.path.exists(final_output):
os.remove(final_output)
os.rename(smallest[0], final_output)
print(f"\nSaved as: {final_output}")
# Clean up other compressed files
for f, _ in sizes:
if f != smallest[0] and os.path.exists(f):
os.remove(f)
return 0
else:
print("No compressed files were created.")
return 1
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment