import os from email.message import EmailMessage from datetime import datetime, timedelta import random # Configuration OUTPUT_DIR = "eml_dump_varied" EMAIL_COUNT = 500 # Total emails to generate SIZES = [1024, 1024*1024, 10*1024*1024] # 1KB, 1MB, 10MB DOMAINS = ["example.com", "testmail.org", "mailbox.net"] SUBJECTS = ["Test email", "Reminder", "Status update", "Hello", "Report"] # Create output directory os.makedirs(OUTPUT_DIR, exist_ok=True) def generate_body(size_bytes): line = "This is a test line.\n" repeat_count = size_bytes // len(line) return line * repeat_count def generate_email(index, size_bytes): msg = EmailMessage() msg["From"] = f"user{random.randint(1, 100)}@{random.choice(DOMAINS)}" msg["To"] = f"user{random.randint(101, 200)}@{random.choice(DOMAINS)}" msg["Subject"] = f"{random.choice(SUBJECTS)} (size: {size_bytes} bytes)" msg["Date"] = (datetime.now() - timedelta(days=random.randint(0, 365))).strftime('%a, %d %b %Y %H:%M:%S +0000') body = generate_body(size_bytes) msg.set_content(body) filename = os.path.join(OUTPUT_DIR, f"email_{index:04d}_{size_bytes//1024}KB.eml") with open(filename, "wb") as f: f.write(bytes(msg)) # Generate emails cycling through sizes for i in range(EMAIL_COUNT): size = SIZES[i % len(SIZES)] generate_email(i + 1, size) print(f"{EMAIL_COUNT} .eml files generated in '{OUTPUT_DIR}/' with varying body sizes.")