Created
November 3, 2025 06:47
-
-
Save Ghost---Shadow/97bb7a7de7cca7e968ec90cc104da97e to your computer and use it in GitHub Desktop.
Compress PDF files by converting to JPEG files to bring it down under 2MB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| PDF Compression Script - Using PyMuPDF to convert pages to JPG and rebuild | |
| Installation: | |
| pip install PyMuPDF Pillow | |
| Usage: | |
| python compress_pdf_final.py | |
| """ | |
| import os | |
| import sys | |
| import fitz # PyMuPDF | |
| from PIL import Image | |
| from io import BytesIO | |
| def compress_pdf_to_jpg(input_path, output_path, dpi=150, quality=75): | |
| """ | |
| Convert each PDF page to JPEG image and create new PDF | |
| Args: | |
| input_path: Input PDF file path | |
| output_path: Output PDF file path | |
| dpi: Resolution for rendering (lower = smaller file) | |
| quality: JPEG quality (1-100) | |
| """ | |
| # Open the input PDF | |
| input_pdf = fitz.open(input_path) | |
| print(f"Processing {len(input_pdf)} pages...") | |
| # Create a new PDF | |
| output_pdf = fitz.open() | |
| # Process each page | |
| for page_num in range(len(input_pdf)): | |
| print(f" Processing page {page_num + 1}/{len(input_pdf)}", end='\r') | |
| # Get the page | |
| page = input_pdf[page_num] | |
| # Calculate zoom factor based on desired DPI | |
| # Default DPI in PyMuPDF is 72 | |
| zoom = dpi / 72 | |
| # Create a transformation matrix | |
| mat = fitz.Matrix(zoom, zoom) | |
| # Render page to pixmap (image) | |
| pix = page.get_pixmap(matrix=mat) | |
| # Convert pixmap to PIL Image | |
| img_data = pix.tobytes("png") | |
| img = Image.open(BytesIO(img_data)) | |
| # Convert to RGB if needed (remove alpha channel) | |
| if img.mode in ('RGBA', 'LA', 'P'): | |
| background = Image.new('RGB', img.size, (255, 255, 255)) | |
| if img.mode == 'P': | |
| img = img.convert('RGBA') | |
| if img.mode in ('RGBA', 'LA'): | |
| background.paste(img, mask=img.split()[-1]) | |
| img = background | |
| else: | |
| img = img.convert('RGB') | |
| elif img.mode != 'RGB': | |
| img = img.convert('RGB') | |
| # Compress to JPEG | |
| jpg_buffer = BytesIO() | |
| img.save(jpg_buffer, format='JPEG', quality=quality, optimize=True) | |
| jpg_data = jpg_buffer.getvalue() | |
| # Create a new page in output PDF with same dimensions as original | |
| page_width = page.rect.width | |
| page_height = page.rect.height | |
| new_page = output_pdf.new_page(width=page_width, height=page_height) | |
| # Insert the compressed JPEG image | |
| img_rect = new_page.rect | |
| new_page.insert_image(img_rect, stream=jpg_data) | |
| print(f"\nProcessed {len(input_pdf)} pages") | |
| # Save the output PDF | |
| print(f"Saving compressed PDF...") | |
| output_pdf.save(output_path, garbage=4, deflate=True, clean=True) | |
| output_pdf.close() | |
| input_pdf.close() | |
| print(f"✓ PDF saved to: {output_path}") | |
| def get_file_size_mb(filepath): | |
| """Get file size in MB""" | |
| return os.path.getsize(filepath) / (1024 * 1024) | |
| def main(): | |
| input_pdf = "Deed of Lease@Dr. A. K. Nanda.pdf" | |
| target_size_mb = 2.0 | |
| if not os.path.exists(input_pdf): | |
| print(f"Error: Input file '{input_pdf}' not found!") | |
| return 1 | |
| original_size = get_file_size_mb(input_pdf) | |
| print(f"Original file size: {original_size:.2f} MB") | |
| print(f"Target size: {target_size_mb} MB") | |
| print(f"Compression needed: {original_size / target_size_mb:.1f}x\n") | |
| # Try different compression levels | |
| # (dpi, quality, level_name) | |
| compression_levels = [ | |
| (150, 80, "high"), | |
| (130, 70, "medium-high"), | |
| (110, 60, "medium"), | |
| (90, 50, "medium-low"), | |
| (75, 45, "low"), | |
| (60, 40, "very-low"), | |
| (50, 35, "ultra-low"), | |
| ] | |
| for dpi, quality, level_name in compression_levels: | |
| output_pdf = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level_name}.pdf" | |
| print(f"═" * 60) | |
| print(f"Attempting: {level_name} (dpi={dpi}, quality={quality})") | |
| print(f"═" * 60) | |
| try: | |
| compress_pdf_to_jpg(input_pdf, output_pdf, dpi, quality) | |
| compressed_size = get_file_size_mb(output_pdf) | |
| compression_ratio = (1 - compressed_size / original_size) * 100 | |
| print(f"\n✓ Compressed file size: {compressed_size:.2f} MB") | |
| print(f"✓ Compression ratio: {compression_ratio:.1f}%") | |
| if compressed_size <= target_size_mb: | |
| # Rename to final output name | |
| final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf" | |
| if os.path.exists(final_output): | |
| os.remove(final_output) | |
| os.rename(output_pdf, final_output) | |
| print(f"\n{'=' * 60}") | |
| print(f"✓ SUCCESS! File is below {target_size_mb} MB") | |
| print(f"{'=' * 60}") | |
| print(f"Original: {original_size:.2f} MB") | |
| print(f"Compressed: {compressed_size:.2f} MB") | |
| print(f"Savings: {original_size - compressed_size:.2f} MB ({compression_ratio:.1f}%)") | |
| print(f"\nOutput saved as: {final_output}") | |
| # Clean up intermediate files | |
| for dpi2, quality2, level2 in compression_levels: | |
| temp_file = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level2}.pdf" | |
| if temp_file != output_pdf and os.path.exists(temp_file): | |
| os.remove(temp_file) | |
| return 0 | |
| else: | |
| print(f"✗ Still above target ({compressed_size:.2f} MB > {target_size_mb} MB)") | |
| print("Trying lower quality...\n") | |
| except Exception as e: | |
| print(f"✗ Error during compression: {e}\n") | |
| import traceback | |
| traceback.print_exc() | |
| # If we get here, show the best result | |
| print(f"\n{'=' * 60}") | |
| print("⚠ Could not achieve target size") | |
| print(f"{'=' * 60}") | |
| # Find the smallest compressed file | |
| compressed_files = [f for f in os.listdir('.') if f.startswith('Deed of Lease@Dr. A. K. Nanda_compressed_')] | |
| if compressed_files: | |
| sizes = [(f, get_file_size_mb(f)) for f in compressed_files] | |
| smallest = min(sizes, key=lambda x: x[1]) | |
| print(f"Best result: {smallest[0]}") | |
| print(f"Size: {smallest[1]:.2f} MB (target was {target_size_mb} MB)") | |
| print(f"This is {smallest[1] - target_size_mb:.2f} MB over the target.") | |
| # Rename best one to final output | |
| final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf" | |
| if os.path.exists(final_output): | |
| os.remove(final_output) | |
| os.rename(smallest[0], final_output) | |
| print(f"\nSaved as: {final_output}") | |
| # Clean up other compressed files | |
| for f, _ in sizes: | |
| if f != smallest[0] and os.path.exists(f): | |
| os.remove(f) | |
| return 0 | |
| else: | |
| print("No compressed files were created.") | |
| return 1 | |
| if __name__ == "__main__": | |
| sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment