Skip to content

Instantly share code, notes, and snippets.

@Ghost---Shadow
Created November 3, 2025 06:47
Show Gist options
  • Select an option

  • Save Ghost---Shadow/97bb7a7de7cca7e968ec90cc104da97e to your computer and use it in GitHub Desktop.

Select an option

Save Ghost---Shadow/97bb7a7de7cca7e968ec90cc104da97e to your computer and use it in GitHub Desktop.

Revisions

  1. Ghost---Shadow created this gist Nov 3, 2025.
    202 changes: 202 additions & 0 deletions compress_pdf_through_jpg.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,202 @@
    #!/usr/bin/env python3
    """
    PDF Compression Script - Using PyMuPDF to convert pages to JPG and rebuild
    Installation:
    pip install PyMuPDF Pillow
    Usage:
    python compress_pdf_final.py
    """

    import os
    import sys
    import fitz # PyMuPDF
    from PIL import Image
    from io import BytesIO

    def compress_pdf_to_jpg(input_path, output_path, dpi=150, quality=75):
    """
    Convert each PDF page to JPEG image and create new PDF
    Args:
    input_path: Input PDF file path
    output_path: Output PDF file path
    dpi: Resolution for rendering (lower = smaller file)
    quality: JPEG quality (1-100)
    """
    # Open the input PDF
    input_pdf = fitz.open(input_path)
    print(f"Processing {len(input_pdf)} pages...")

    # Create a new PDF
    output_pdf = fitz.open()

    # Process each page
    for page_num in range(len(input_pdf)):
    print(f" Processing page {page_num + 1}/{len(input_pdf)}", end='\r')

    # Get the page
    page = input_pdf[page_num]

    # Calculate zoom factor based on desired DPI
    # Default DPI in PyMuPDF is 72
    zoom = dpi / 72

    # Create a transformation matrix
    mat = fitz.Matrix(zoom, zoom)

    # Render page to pixmap (image)
    pix = page.get_pixmap(matrix=mat)

    # Convert pixmap to PIL Image
    img_data = pix.tobytes("png")
    img = Image.open(BytesIO(img_data))

    # Convert to RGB if needed (remove alpha channel)
    if img.mode in ('RGBA', 'LA', 'P'):
    background = Image.new('RGB', img.size, (255, 255, 255))
    if img.mode == 'P':
    img = img.convert('RGBA')
    if img.mode in ('RGBA', 'LA'):
    background.paste(img, mask=img.split()[-1])
    img = background
    else:
    img = img.convert('RGB')
    elif img.mode != 'RGB':
    img = img.convert('RGB')

    # Compress to JPEG
    jpg_buffer = BytesIO()
    img.save(jpg_buffer, format='JPEG', quality=quality, optimize=True)
    jpg_data = jpg_buffer.getvalue()

    # Create a new page in output PDF with same dimensions as original
    page_width = page.rect.width
    page_height = page.rect.height
    new_page = output_pdf.new_page(width=page_width, height=page_height)

    # Insert the compressed JPEG image
    img_rect = new_page.rect
    new_page.insert_image(img_rect, stream=jpg_data)

    print(f"\nProcessed {len(input_pdf)} pages")

    # Save the output PDF
    print(f"Saving compressed PDF...")
    output_pdf.save(output_path, garbage=4, deflate=True, clean=True)
    output_pdf.close()
    input_pdf.close()

    print(f"✓ PDF saved to: {output_path}")

    def get_file_size_mb(filepath):
    """Get file size in MB"""
    return os.path.getsize(filepath) / (1024 * 1024)

    def main():
    input_pdf = "Deed of Lease@Dr. A. K. Nanda.pdf"
    target_size_mb = 2.0

    if not os.path.exists(input_pdf):
    print(f"Error: Input file '{input_pdf}' not found!")
    return 1

    original_size = get_file_size_mb(input_pdf)
    print(f"Original file size: {original_size:.2f} MB")
    print(f"Target size: {target_size_mb} MB")
    print(f"Compression needed: {original_size / target_size_mb:.1f}x\n")

    # Try different compression levels
    # (dpi, quality, level_name)
    compression_levels = [
    (150, 80, "high"),
    (130, 70, "medium-high"),
    (110, 60, "medium"),
    (90, 50, "medium-low"),
    (75, 45, "low"),
    (60, 40, "very-low"),
    (50, 35, "ultra-low"),
    ]

    for dpi, quality, level_name in compression_levels:
    output_pdf = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level_name}.pdf"

    print(f"═" * 60)
    print(f"Attempting: {level_name} (dpi={dpi}, quality={quality})")
    print(f"═" * 60)

    try:
    compress_pdf_to_jpg(input_pdf, output_pdf, dpi, quality)

    compressed_size = get_file_size_mb(output_pdf)
    compression_ratio = (1 - compressed_size / original_size) * 100

    print(f"\n✓ Compressed file size: {compressed_size:.2f} MB")
    print(f"✓ Compression ratio: {compression_ratio:.1f}%")

    if compressed_size <= target_size_mb:
    # Rename to final output name
    final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf"
    if os.path.exists(final_output):
    os.remove(final_output)
    os.rename(output_pdf, final_output)

    print(f"\n{'=' * 60}")
    print(f"✓ SUCCESS! File is below {target_size_mb} MB")
    print(f"{'=' * 60}")
    print(f"Original: {original_size:.2f} MB")
    print(f"Compressed: {compressed_size:.2f} MB")
    print(f"Savings: {original_size - compressed_size:.2f} MB ({compression_ratio:.1f}%)")
    print(f"\nOutput saved as: {final_output}")

    # Clean up intermediate files
    for dpi2, quality2, level2 in compression_levels:
    temp_file = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level2}.pdf"
    if temp_file != output_pdf and os.path.exists(temp_file):
    os.remove(temp_file)

    return 0
    else:
    print(f"✗ Still above target ({compressed_size:.2f} MB > {target_size_mb} MB)")
    print("Trying lower quality...\n")

    except Exception as e:
    print(f"✗ Error during compression: {e}\n")
    import traceback
    traceback.print_exc()

    # If we get here, show the best result
    print(f"\n{'=' * 60}")
    print("⚠ Could not achieve target size")
    print(f"{'=' * 60}")

    # Find the smallest compressed file
    compressed_files = [f for f in os.listdir('.') if f.startswith('Deed of Lease@Dr. A. K. Nanda_compressed_')]
    if compressed_files:
    sizes = [(f, get_file_size_mb(f)) for f in compressed_files]
    smallest = min(sizes, key=lambda x: x[1])

    print(f"Best result: {smallest[0]}")
    print(f"Size: {smallest[1]:.2f} MB (target was {target_size_mb} MB)")
    print(f"This is {smallest[1] - target_size_mb:.2f} MB over the target.")

    # Rename best one to final output
    final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf"
    if os.path.exists(final_output):
    os.remove(final_output)
    os.rename(smallest[0], final_output)
    print(f"\nSaved as: {final_output}")

    # Clean up other compressed files
    for f, _ in sizes:
    if f != smallest[0] and os.path.exists(f):
    os.remove(f)

    return 0
    else:
    print("No compressed files were created.")
    return 1

    if __name__ == "__main__":
    sys.exit(main())