Ghost---Shadow · November 3, 2025 06:47
diff --git a/compress_pdf_through_jpg.py b/compress_pdf_through_jpg.py
 #!/usr/bin/env python3
 """
 PDF Compression Script - Using PyMuPDF to convert pages to JPG and rebuild

 Installation:
    pip install PyMuPDF Pillow

 Usage:
    python compress_pdf_final.py
 """

 import os
 import sys
 import fitz  # PyMuPDF
 from PIL import Image
 from io import BytesIO

 def compress_pdf_to_jpg(input_path, output_path, dpi=150, quality=75):
    """
    Convert each PDF page to JPEG image and create new PDF

    Args:
        input_path: Input PDF file path
        output_path: Output PDF file path
        dpi: Resolution for rendering (lower = smaller file)
        quality: JPEG quality (1-100)
    """
    # Open the input PDF
    input_pdf = fitz.open(input_path)
    print(f"Processing {len(input_pdf)} pages...")

    # Create a new PDF
    output_pdf = fitz.open()

    # Process each page
    for page_num in range(len(input_pdf)):
        print(f"  Processing page {page_num + 1}/{len(input_pdf)}", end='\r')

        # Get the page
        page = input_pdf[page_num]

        # Calculate zoom factor based on desired DPI
        # Default DPI in PyMuPDF is 72
        zoom = dpi / 72

        # Create a transformation matrix
        mat = fitz.Matrix(zoom, zoom)

        # Render page to pixmap (image)
        pix = page.get_pixmap(matrix=mat)

        # Convert pixmap to PIL Image
        img_data = pix.tobytes("png")
        img = Image.open(BytesIO(img_data))

        # Convert to RGB if needed (remove alpha channel)
        if img.mode in ('RGBA', 'LA', 'P'):
            background = Image.new('RGB', img.size, (255, 255, 255))
            if img.mode == 'P':
                img = img.convert('RGBA')
            if img.mode in ('RGBA', 'LA'):
                background.paste(img, mask=img.split()[-1])
                img = background
            else:
                img = img.convert('RGB')
        elif img.mode != 'RGB':
            img = img.convert('RGB')

        # Compress to JPEG
        jpg_buffer = BytesIO()
        img.save(jpg_buffer, format='JPEG', quality=quality, optimize=True)
        jpg_data = jpg_buffer.getvalue()

        # Create a new page in output PDF with same dimensions as original
        page_width = page.rect.width
        page_height = page.rect.height
        new_page = output_pdf.new_page(width=page_width, height=page_height)

        # Insert the compressed JPEG image
        img_rect = new_page.rect
        new_page.insert_image(img_rect, stream=jpg_data)

    print(f"\nProcessed {len(input_pdf)} pages")

    # Save the output PDF
    print(f"Saving compressed PDF...")
    output_pdf.save(output_path, garbage=4, deflate=True, clean=True)
    output_pdf.close()
    input_pdf.close()

    print(f"✓ PDF saved to: {output_path}")

 def get_file_size_mb(filepath):
    """Get file size in MB"""
    return os.path.getsize(filepath) / (1024 * 1024)

 def main():
    input_pdf = "Deed of Lease@Dr. A. K. Nanda.pdf"
    target_size_mb = 2.0

    if not os.path.exists(input_pdf):
        print(f"Error: Input file '{input_pdf}' not found!")
        return 1

    original_size = get_file_size_mb(input_pdf)
    print(f"Original file size: {original_size:.2f} MB")
    print(f"Target size: {target_size_mb} MB")
    print(f"Compression needed: {original_size / target_size_mb:.1f}x\n")

    # Try different compression levels
    # (dpi, quality, level_name)
    compression_levels = [
        (150, 80, "high"),
        (130, 70, "medium-high"),
        (110, 60, "medium"),
        (90, 50, "medium-low"),
        (75, 45, "low"),
        (60, 40, "very-low"),
        (50, 35, "ultra-low"),
    ]

    for dpi, quality, level_name in compression_levels:
        output_pdf = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level_name}.pdf"

        print(f"═" * 60)
        print(f"Attempting: {level_name} (dpi={dpi}, quality={quality})")
        print(f"═" * 60)

        try:
            compress_pdf_to_jpg(input_pdf, output_pdf, dpi, quality)

            compressed_size = get_file_size_mb(output_pdf)
            compression_ratio = (1 - compressed_size / original_size) * 100

            print(f"\n✓ Compressed file size: {compressed_size:.2f} MB")
            print(f"✓ Compression ratio: {compression_ratio:.1f}%")

            if compressed_size <= target_size_mb:
                # Rename to final output name
                final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf"
                if os.path.exists(final_output):
                    os.remove(final_output)
                os.rename(output_pdf, final_output)

                print(f"\n{'=' * 60}")
                print(f"✓ SUCCESS! File is below {target_size_mb} MB")
                print(f"{'=' * 60}")
                print(f"Original: {original_size:.2f} MB")
                print(f"Compressed: {compressed_size:.2f} MB")
                print(f"Savings: {original_size - compressed_size:.2f} MB ({compression_ratio:.1f}%)")
                print(f"\nOutput saved as: {final_output}")

                # Clean up intermediate files
                for dpi2, quality2, level2 in compression_levels:
                    temp_file = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level2}.pdf"
                    if temp_file != output_pdf and os.path.exists(temp_file):
                        os.remove(temp_file)

                return 0
            else:
                print(f"✗ Still above target ({compressed_size:.2f} MB > {target_size_mb} MB)")
                print("Trying lower quality...\n")

        except Exception as e:
            print(f"✗ Error during compression: {e}\n")
            import traceback
            traceback.print_exc()

    # If we get here, show the best result
    print(f"\n{'=' * 60}")
    print("⚠  Could not achieve target size")
    print(f"{'=' * 60}")

    # Find the smallest compressed file
    compressed_files = [f for f in os.listdir('.') if f.startswith('Deed of Lease@Dr. A. K. Nanda_compressed_')]
    if compressed_files:
        sizes = [(f, get_file_size_mb(f)) for f in compressed_files]
        smallest = min(sizes, key=lambda x: x[1])

        print(f"Best result: {smallest[0]}")
        print(f"Size: {smallest[1]:.2f} MB (target was {target_size_mb} MB)")
        print(f"This is {smallest[1] - target_size_mb:.2f} MB over the target.")

        # Rename best one to final output
        final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf"
        if os.path.exists(final_output):
            os.remove(final_output)
        os.rename(smallest[0], final_output)
        print(f"\nSaved as: {final_output}")

        # Clean up other compressed files
        for f, _ in sizes:
            if f != smallest[0] and os.path.exists(f):
                os.remove(f)

        return 0
    else:
        print("No compressed files were created.")
        return 1

 if __name__ == "__main__":
    sys.exit(main())
	#!/usr/bin/env python3
	"""
	PDF Compression Script - Using PyMuPDF to convert pages to JPG and rebuild

	Installation:
	pip install PyMuPDF Pillow

	Usage:
	python compress_pdf_final.py
	"""

	import os
	import sys
	import fitz # PyMuPDF
	from PIL import Image
	from io import BytesIO

	def compress_pdf_to_jpg(input_path, output_path, dpi=150, quality=75):
	"""
	Convert each PDF page to JPEG image and create new PDF

	Args:
	input_path: Input PDF file path
	output_path: Output PDF file path
	dpi: Resolution for rendering (lower = smaller file)
	quality: JPEG quality (1-100)
	"""
	# Open the input PDF
	input_pdf = fitz.open(input_path)
	print(f"Processing {len(input_pdf)} pages...")

	# Create a new PDF
	output_pdf = fitz.open()

	# Process each page
	for page_num in range(len(input_pdf)):
	print(f" Processing page {page_num + 1}/{len(input_pdf)}", end='\r')

	# Get the page
	page = input_pdf[page_num]

	# Calculate zoom factor based on desired DPI
	# Default DPI in PyMuPDF is 72
	zoom = dpi / 72

	# Create a transformation matrix
	mat = fitz.Matrix(zoom, zoom)

	# Render page to pixmap (image)
	pix = page.get_pixmap(matrix=mat)

	# Convert pixmap to PIL Image
	img_data = pix.tobytes("png")
	img = Image.open(BytesIO(img_data))

	# Convert to RGB if needed (remove alpha channel)
	if img.mode in ('RGBA', 'LA', 'P'):
	background = Image.new('RGB', img.size, (255, 255, 255))
	if img.mode == 'P':
	img = img.convert('RGBA')
	if img.mode in ('RGBA', 'LA'):
	background.paste(img, mask=img.split()[-1])
	img = background
	else:
	img = img.convert('RGB')
	elif img.mode != 'RGB':
	img = img.convert('RGB')

	# Compress to JPEG
	jpg_buffer = BytesIO()
	img.save(jpg_buffer, format='JPEG', quality=quality, optimize=True)
	jpg_data = jpg_buffer.getvalue()

	# Create a new page in output PDF with same dimensions as original
	page_width = page.rect.width
	page_height = page.rect.height
	new_page = output_pdf.new_page(width=page_width, height=page_height)

	# Insert the compressed JPEG image
	img_rect = new_page.rect
	new_page.insert_image(img_rect, stream=jpg_data)

	print(f"\nProcessed {len(input_pdf)} pages")

	# Save the output PDF
	print(f"Saving compressed PDF...")
	output_pdf.save(output_path, garbage=4, deflate=True, clean=True)
	output_pdf.close()
	input_pdf.close()

	print(f"✓ PDF saved to: {output_path}")

	def get_file_size_mb(filepath):
	"""Get file size in MB"""
	return os.path.getsize(filepath) / (1024 * 1024)

	def main():
	input_pdf = "Deed of Lease@Dr. A. K. Nanda.pdf"
	target_size_mb = 2.0

	if not os.path.exists(input_pdf):
	print(f"Error: Input file '{input_pdf}' not found!")
	return 1

	original_size = get_file_size_mb(input_pdf)
	print(f"Original file size: {original_size:.2f} MB")
	print(f"Target size: {target_size_mb} MB")
	print(f"Compression needed: {original_size / target_size_mb:.1f}x\n")

	# Try different compression levels
	# (dpi, quality, level_name)
	compression_levels = [
	(150, 80, "high"),
	(130, 70, "medium-high"),
	(110, 60, "medium"),
	(90, 50, "medium-low"),
	(75, 45, "low"),
	(60, 40, "very-low"),
	(50, 35, "ultra-low"),
	]

	for dpi, quality, level_name in compression_levels:
	output_pdf = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level_name}.pdf"

	print(f"═" * 60)
	print(f"Attempting: {level_name} (dpi={dpi}, quality={quality})")
	print(f"═" * 60)

	try:
	compress_pdf_to_jpg(input_pdf, output_pdf, dpi, quality)

	compressed_size = get_file_size_mb(output_pdf)
	compression_ratio = (1 - compressed_size / original_size) * 100

	print(f"\n✓ Compressed file size: {compressed_size:.2f} MB")
	print(f"✓ Compression ratio: {compression_ratio:.1f}%")

	if compressed_size <= target_size_mb:
	# Rename to final output name
	final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf"
	if os.path.exists(final_output):
	os.remove(final_output)
	os.rename(output_pdf, final_output)

	print(f"\n{'=' * 60}")
	print(f"✓ SUCCESS! File is below {target_size_mb} MB")
	print(f"{'=' * 60}")
	print(f"Original: {original_size:.2f} MB")
	print(f"Compressed: {compressed_size:.2f} MB")
	print(f"Savings: {original_size - compressed_size:.2f} MB ({compression_ratio:.1f}%)")
	print(f"\nOutput saved as: {final_output}")

	# Clean up intermediate files
	for dpi2, quality2, level2 in compression_levels:
	temp_file = f"Deed of Lease@Dr. A. K. Nanda_compressed_{level2}.pdf"
	if temp_file != output_pdf and os.path.exists(temp_file):
	os.remove(temp_file)

	return 0
	else:
	print(f"✗ Still above target ({compressed_size:.2f} MB > {target_size_mb} MB)")
	print("Trying lower quality...\n")

	except Exception as e:
	print(f"✗ Error during compression: {e}\n")
	import traceback
	traceback.print_exc()

	# If we get here, show the best result
	print(f"\n{'=' * 60}")
	print("⚠ Could not achieve target size")
	print(f"{'=' * 60}")

	# Find the smallest compressed file
	compressed_files = [f for f in os.listdir('.') if f.startswith('Deed of Lease@Dr. A. K. Nanda_compressed_')]
	if compressed_files:
	sizes = [(f, get_file_size_mb(f)) for f in compressed_files]
	smallest = min(sizes, key=lambda x: x[1])

	print(f"Best result: {smallest[0]}")
	print(f"Size: {smallest[1]:.2f} MB (target was {target_size_mb} MB)")
	print(f"This is {smallest[1] - target_size_mb:.2f} MB over the target.")

	# Rename best one to final output
	final_output = "Deed of Lease@Dr. A. K. Nanda_compressed.pdf"
	if os.path.exists(final_output):
	os.remove(final_output)
	os.rename(smallest[0], final_output)
	print(f"\nSaved as: {final_output}")

	# Clean up other compressed files
	for f, _ in sizes:
	if f != smallest[0] and os.path.exists(f):
	os.remove(f)

	return 0
	else:
	print("No compressed files were created.")
	return 1

	if __name__ == "__main__":
	sys.exit(main())
No results found