Created
November 3, 2025 06:47
-
-
Save Ghost---Shadow/97bb7a7de7cca7e968ec90cc104da97e to your computer and use it in GitHub Desktop.
Revisions
-
Ghost---Shadow created this gist
Nov 3, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,202 @@ #!/usr/bin/env python3 """ PDF Compression Script - Using PyMuPDF to convert pages to JPG and rebuild Installation: pip install PyMuPDF Pillow Usage: python compress_pdf_final.py """ import os import sys import fitz # PyMuPDF from PIL import Image from io import BytesIO def compress_pdf_to_jpg(input_path, output_path, dpi=150, quality=75): """ Convert each PDF page to JPEG image and create new PDF Args: input_path: Input PDF file path output_path: Output PDF file path dpi: Resolution for rendering (lower = smaller file) quality: JPEG quality (1-100) """ # Open the input PDF input_pdf = fitz.open(input_path) print(f"Processing {len(input_pdf)} pages...") # Create a new PDF output_pdf = fitz.open() # Process each page for page_num in range(len(input_pdf)): print(f" Processing page {page_num + 1}/{len(input_pdf)}", end='\r') # Get the page page = input_pdf[page_num] # Calculate zoom factor based on desired DPI # Default DPI in PyMuPDF is 72 zoom = dpi / 72 # Create a transformation matrix mat = fitz.Matrix(zoom, zoom) # Render page to pixmap (image) pix = page.get_pixmap(matrix=mat) # Convert pixmap to PIL Image img_data = pix.tobytes("png") img = Image.open(BytesIO(img_data)) # Convert to RGB if needed (remove alpha channel) if img.mode in ('RGBA', 'LA', 'P'): background = Image.new('RGB', img.size, (255, 255, 255)) if img.mode == 'P': img = img.convert('RGBA') if img.mode in ('RGBA', 'LA'): background.paste(img, mask=img.split()[-1]) img = background else: img = img.convert('RGB') elif img.mode != 'RGB': img = img.convert('RGB') # Compress to JPEG jpg_buffer = BytesIO() img.save(jpg_buffer, format='JPEG', quality=quality, optimize=True) jpg_data = jpg_buffer.getvalue() # Create a new page in output PDF with same dimensions as original page_width = page.rect.width page_height = page.rect.height new_page = output_pdf.new_page(width=page_width, height=page_height) # Insert the compressed JPEG image img_rect = new_page.rect new_page.insert_image(img_rect, stream=jpg_data) print(f"\nProcessed {len(input_pdf)} pages") # Save the output PDF print(f"Saving compressed PDF...") output_pdf.save(output_path, garbage=4, deflate=True, clean=True) output_pdf.close() input_pdf.close() print(f"✓ PDF saved to: {output_path}") def get_file_size_mb(filepath): """Get file size in MB""" return os.path.getsize(filepath) / (1024 * 1024) def main(): input_pdf = "Deed of Lease@Dr. A. K. Nanda.pdf" target_size_mb = 2.0 if not os.path.exists(input_pdf): print(f"Error: Input file '{input_pdf}' not found!") return 1 original_size = get_file_size_mb(input_pdf) print(f"Original file size: {original_size:.2f} MB") print(f"Target size: {target_size_mb} MB") print(f"Compression needed: {original_size / target_size_mb:.1f}x\n") # Try different compression levels # (dpi, quality, level_name) compression_levels = [ (150, 80, "high"), (130, 70, "medium-high"), (110, 60, "medium"), (90, 50, "medium-low"), (75, 45, "low"), (60, 40, "very-low"), (50, 35, "ultra-low"), ] for dpi, quality, level_name in compression_levels: output_pdf = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level_name}.pdf" print(f"═" * 60) print(f"Attempting: {level_name} (dpi={dpi}, quality={quality})") print(f"═" * 60) try: compress_pdf_to_jpg(input_pdf, output_pdf, dpi, quality) compressed_size = get_file_size_mb(output_pdf) compression_ratio = (1 - compressed_size / original_size) * 100 print(f"\n✓ Compressed file size: {compressed_size:.2f} MB") print(f"✓ Compression ratio: {compression_ratio:.1f}%") if compressed_size <= target_size_mb: # Rename to final output name final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf" if os.path.exists(final_output): os.remove(final_output) os.rename(output_pdf, final_output) print(f"\n{'=' * 60}") print(f"✓ SUCCESS! File is below {target_size_mb} MB") print(f"{'=' * 60}") print(f"Original: {original_size:.2f} MB") print(f"Compressed: {compressed_size:.2f} MB") print(f"Savings: {original_size - compressed_size:.2f} MB ({compression_ratio:.1f}%)") print(f"\nOutput saved as: {final_output}") # Clean up intermediate files for dpi2, quality2, level2 in compression_levels: temp_file = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level2}.pdf" if temp_file != output_pdf and os.path.exists(temp_file): os.remove(temp_file) return 0 else: print(f"✗ Still above target ({compressed_size:.2f} MB > {target_size_mb} MB)") print("Trying lower quality...\n") except Exception as e: print(f"✗ Error during compression: {e}\n") import traceback traceback.print_exc() # If we get here, show the best result print(f"\n{'=' * 60}") print("⚠ Could not achieve target size") print(f"{'=' * 60}") # Find the smallest compressed file compressed_files = [f for f in os.listdir('.') if f.startswith('Deed of Lease@Dr. A. K. Nanda_compressed_')] if compressed_files: sizes = [(f, get_file_size_mb(f)) for f in compressed_files] smallest = min(sizes, key=lambda x: x[1]) print(f"Best result: {smallest[0]}") print(f"Size: {smallest[1]:.2f} MB (target was {target_size_mb} MB)") print(f"This is {smallest[1] - target_size_mb:.2f} MB over the target.") # Rename best one to final output final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf" if os.path.exists(final_output): os.remove(final_output) os.rename(smallest[0], final_output) print(f"\nSaved as: {final_output}") # Clean up other compressed files for f, _ in sizes: if f != smallest[0] and os.path.exists(f): os.remove(f) return 0 else: print("No compressed files were created.") return 1 if __name__ == "__main__": sys.exit(main())