Ghost---Shadow · November 3, 2025 06:47 · Nov 3, 2025
diff --git a/compress_pdf_through_jpg.py b/compress_pdf_through_jpg.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+"""
+PDF Compression Script - Using PyMuPDF to convert pages to JPG and rebuild
+
+Installation:
+    pip install PyMuPDF Pillow
+
+Usage:
+    python compress_pdf_final.py
+"""
+
+import os
+import sys
+import fitz  # PyMuPDF
+from PIL import Image
+from io import BytesIO
+
+def compress_pdf_to_jpg(input_path, output_path, dpi=150, quality=75):
+    """
+    Convert each PDF page to JPEG image and create new PDF
+
+    Args:
+        input_path: Input PDF file path
+        output_path: Output PDF file path
+        dpi: Resolution for rendering (lower = smaller file)
+        quality: JPEG quality (1-100)
+    """
+    # Open the input PDF
+    input_pdf = fitz.open(input_path)
+    print(f"Processing {len(input_pdf)} pages...")
+
+    # Create a new PDF
+    output_pdf = fitz.open()
+
+    # Process each page
+    for page_num in range(len(input_pdf)):
+        print(f"  Processing page {page_num + 1}/{len(input_pdf)}", end='\r')
+
+        # Get the page
+        page = input_pdf[page_num]
+
+        # Calculate zoom factor based on desired DPI
+        # Default DPI in PyMuPDF is 72
+        zoom = dpi / 72
+
+        # Create a transformation matrix
+        mat = fitz.Matrix(zoom, zoom)
+
+        # Render page to pixmap (image)
+        pix = page.get_pixmap(matrix=mat)
+
+        # Convert pixmap to PIL Image
+        img_data = pix.tobytes("png")
+        img = Image.open(BytesIO(img_data))
+
+        # Convert to RGB if needed (remove alpha channel)
+        if img.mode in ('RGBA', 'LA', 'P'):
+            background = Image.new('RGB', img.size, (255, 255, 255))
+            if img.mode == 'P':
+                img = img.convert('RGBA')
+            if img.mode in ('RGBA', 'LA'):
+                background.paste(img, mask=img.split()[-1])
+                img = background
+            else:
+                img = img.convert('RGB')
+        elif img.mode != 'RGB':
+            img = img.convert('RGB')
+
+        # Compress to JPEG
+        jpg_buffer = BytesIO()
+        img.save(jpg_buffer, format='JPEG', quality=quality, optimize=True)
+        jpg_data = jpg_buffer.getvalue()
+
+        # Create a new page in output PDF with same dimensions as original
+        page_width = page.rect.width
+        page_height = page.rect.height
+        new_page = output_pdf.new_page(width=page_width, height=page_height)
+
+        # Insert the compressed JPEG image
+        img_rect = new_page.rect
+        new_page.insert_image(img_rect, stream=jpg_data)
+
+    print(f"\nProcessed {len(input_pdf)} pages")
+
+    # Save the output PDF
+    print(f"Saving compressed PDF...")
+    output_pdf.save(output_path, garbage=4, deflate=True, clean=True)
+    output_pdf.close()
+    input_pdf.close()
+
+    print(f"✓ PDF saved to: {output_path}")
+
+def get_file_size_mb(filepath):
+    """Get file size in MB"""
+    return os.path.getsize(filepath) / (1024 * 1024)
+
+def main():
+    input_pdf = "Deed of Lease@Dr. A. K. Nanda.pdf"
+    target_size_mb = 2.0
+
+    if not os.path.exists(input_pdf):
+        print(f"Error: Input file '{input_pdf}' not found!")
+        return 1
+
+    original_size = get_file_size_mb(input_pdf)
+    print(f"Original file size: {original_size:.2f} MB")
+    print(f"Target size: {target_size_mb} MB")
+    print(f"Compression needed: {original_size / target_size_mb:.1f}x\n")
+
+    # Try different compression levels
+    # (dpi, quality, level_name)
+    compression_levels = [
+        (150, 80, "high"),
+        (130, 70, "medium-high"),
+        (110, 60, "medium"),
+        (90, 50, "medium-low"),
+        (75, 45, "low"),
+        (60, 40, "very-low"),
+        (50, 35, "ultra-low"),
+    ]
+
+    for dpi, quality, level_name in compression_levels:
+        output_pdf = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level_name}.pdf"
+
+        print(f"═" * 60)
+        print(f"Attempting: {level_name} (dpi={dpi}, quality={quality})")
+        print(f"═" * 60)
+
+        try:
+            compress_pdf_to_jpg(input_pdf, output_pdf, dpi, quality)
+
+            compressed_size = get_file_size_mb(output_pdf)
+            compression_ratio = (1 - compressed_size / original_size) * 100
+
+            print(f"\n✓ Compressed file size: {compressed_size:.2f} MB")
+            print(f"✓ Compression ratio: {compression_ratio:.1f}%")
+
+            if compressed_size <= target_size_mb:
+                # Rename to final output name
+                final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf"
+                if os.path.exists(final_output):
+                    os.remove(final_output)
+                os.rename(output_pdf, final_output)
+
+                print(f"\n{'=' * 60}")
+                print(f"✓ SUCCESS! File is below {target_size_mb} MB")
+                print(f"{'=' * 60}")
+                print(f"Original: {original_size:.2f} MB")
+                print(f"Compressed: {compressed_size:.2f} MB")
+                print(f"Savings: {original_size - compressed_size:.2f} MB ({compression_ratio:.1f}%)")
+                print(f"\nOutput saved as: {final_output}")
+
+                # Clean up intermediate files
+                for dpi2, quality2, level2 in compression_levels:
+                    temp_file = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level2}.pdf"
+                    if temp_file != output_pdf and os.path.exists(temp_file):
+                        os.remove(temp_file)
+
+                return 0
+            else:
+                print(f"✗ Still above target ({compressed_size:.2f} MB > {target_size_mb} MB)")
+                print("Trying lower quality...\n")
+
+        except Exception as e:
+            print(f"✗ Error during compression: {e}\n")
+            import traceback
+            traceback.print_exc()
+
+    # If we get here, show the best result
+    print(f"\n{'=' * 60}")
+    print("⚠  Could not achieve target size")
+    print(f"{'=' * 60}")
+
+    # Find the smallest compressed file
+    compressed_files = [f for f in os.listdir('.') if f.startswith('Deed of Lease@Dr. A. K. Nanda_compressed_')]
+    if compressed_files:
+        sizes = [(f, get_file_size_mb(f)) for f in compressed_files]
+        smallest = min(sizes, key=lambda x: x[1])
+
+        print(f"Best result: {smallest[0]}")
+        print(f"Size: {smallest[1]:.2f} MB (target was {target_size_mb} MB)")
+        print(f"This is {smallest[1] - target_size_mb:.2f} MB over the target.")
+
+        # Rename best one to final output
+        final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf"
+        if os.path.exists(final_output):
+            os.remove(final_output)
+        os.rename(smallest[0], final_output)
+        print(f"\nSaved as: {final_output}")
+
+        # Clean up other compressed files
+        for f, _ in sizes:
+            if f != smallest[0] and os.path.exists(f):
+                os.remove(f)
+
+        return 0
+    else:
+        print("No compressed files were created.")
+        return 1
+
+if __name__ == "__main__":
+    sys.exit(main())
No results found