#!/usr/bin/env python3
import asyncio
import random
import datetime
import time
import os
from concurrent.futures import ThreadPoolExecutor

# Configuration
BATCH_SIZE = 10000
CONCURRENT_BATCHES = 10
WRITE_BUFFER_SIZE = 50 * 1024 * 1024  # 50MB
TARGET_SIZE_GB = 2
NUM_THREADS = 8

# Pre-generate data pools
IPS = [
    f"{random.randint(1, 255)}.{random.randint(0, 255)}."
    f"{random.randint(0, 255)}.{random.randint(1, 255)}"
    for _ in range(1000)
]
METHODS = ["GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS"]
PATHS = [
    "/",
    "/api/users",
    "/api/products",
    "/api/orders",
    "/api/auth",
    "/login",
    "/logout",
    "/dashboard",
    "/profile",
    "/settings",
    "/static/css/main.css",
    "/static/js/app.js",
    "/static/js/vendor.js",
    "/images/logo.png",
    "/images/banner.jpg",
    "/favicon.ico",
    "/api/v1/data",
    "/api/v2/users",
    "/health",
    "/metrics",
    "/admin/dashboard",
    "/admin/users",
    "/admin/settings",
    "/products/1234",
    "/products/5678",
    "/cart",
    "/checkout",
]
STATUS_CODES = [
    200,
    201,
    204,
    301,
    302,
    304,
    400,
    401,
    403,
    404,
    500,
    502,
    503,
]
USER_AGENTS = [
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) Safari/605.1.15",
    "curl/7.68.0",
    "Python/3.9 aiohttp/3.7.4",
    "PostmanRuntime/7.28.4",
]
REFERERS = [
    "-",
    "https://example.com",
    "https://google.com",
    "https://github.com",
]


def generate_log_lines(count: int) -> str:
    """Generate multiple log lines as a single string."""
    lines = []
    for _ in range(count):
        ip = random.choice(IPS)
        timestamp = datetime.datetime.now() - datetime.timedelta(
            days=random.randint(0, 30),
            hours=random.randint(0, 23),
            minutes=random.randint(0, 59),
        )
        timestamp_str = timestamp.strftime("%d/%b/%Y:%H:%M:%S +0000")
        method = random.choice(METHODS)
        path = random.choice(PATHS)
        status = random.choice(STATUS_CODES)
        size = random.randint(100, 50000)
        referer = random.choice(REFERERS)
        user_agent = random.choice(USER_AGENTS)
        response_time = round(random.uniform(0.001, 5.0), 3)

        lines.append(
            f'{ip} - - [{timestamp_str}] "{method} {path} HTTP/1.1" {status} '
            f'{size} "{referer}" "{user_agent}" {response_time}\n'
        )

    return "".join(lines)


async def generate_batch(executor: ThreadPoolExecutor, batch_size: int) -> str:
    """Generate a batch of log lines using thread pool."""
    loop = asyncio.get_event_loop()
    return await loop.run_in_executor(executor, generate_log_lines, batch_size)


async def write_to_file(file_handle, data: str, stats: dict):
    """Write data to file using thread pool."""
    loop = asyncio.get_event_loop()
    await loop.run_in_executor(None, file_handle.write, data)
    stats["bytes_written"] += len(data.encode("utf-8"))
    stats["lines_written"] += data.count("\n")


async def generate_nginx_log(
    filename: str = "nginx_sample.log", target_size_gb: float = 2
):
    """Main async function to generate the log file."""
    target_size = int(target_size_gb * 1024 * 1024 * 1024)
    stats = {"bytes_written": 0, "lines_written": 0}

    print(f"Generating {target_size_gb}GB nginx log file...")
    print(
        f"Using {CONCURRENT_BATCHES} concur generators, {NUM_THREADS} threads"
    )

    start_time = time.time()
    last_report_time = start_time
    last_bytes = 0

    with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
        with open(filename, "w", buffering=WRITE_BUFFER_SIZE) as f:
            pending_tasks = []

            while stats["bytes_written"] < target_size:
                # Keep queue filled with concurrent batch generations
                while (
                    len(pending_tasks) < CONCURRENT_BATCHES
                    and stats["bytes_written"] < target_size
                ):
                    task = asyncio.create_task(
                        generate_batch(executor, BATCH_SIZE)
                    )
                    pending_tasks.append(task)

                # Wait for at least one batch to complete
                done, pending = await asyncio.wait(
                    pending_tasks, return_when=asyncio.FIRST_COMPLETED
                )
                pending_tasks = list(pending)

                # Write completed batches
                for task in done:
                    batch_data = await task
                    await write_to_file(f, batch_data, stats)

                # Progress reporting
                current_time = time.time()
                if current_time - last_report_time >= 0.5:
                    elapsed = current_time - start_time
                    speed = (stats["bytes_written"] - last_bytes) / (
                        (current_time - last_report_time) * 1024 * 1024
                    )
                    avg_speed = stats["bytes_written"] / (
                        elapsed * 1024 * 1024
                    )
                    progress = (stats["bytes_written"] / target_size) * 100
                    gb_written = stats["bytes_written"] / (1024**3)

                    print(
                        f"\rProgress: {progress:.1f}% ({gb_written:.2f}GB) | "
                        f"Spd: {speed:.0f} MB/s | Avg: {avg_speed:.0f} MB/s | "
                        f"Lines: {stats['lines_written']:,}",
                        end="",
                        flush=True,
                    )

                    last_report_time = current_time
                    last_bytes = stats["bytes_written"]

            # Process any remaining tasks
            if pending_tasks:
                remaining = await asyncio.gather(*pending_tasks)
                for batch_data in remaining:
                    if stats["bytes_written"] < target_size:
                        await write_to_file(f, batch_data, stats)

    elapsed = time.time() - start_time
    final_size = os.path.getsize(filename)

    print(f"\n✓ Completed in {elapsed:.2f} seconds")
    print(f"Final file size: {final_size / (1024**3):.2f}GB")
    print(f"Average speed: {(final_size / (1024**2)) / elapsed:.0f} MB/s")
    print(f"Total lines: {stats['lines_written']:,}")


async def main():
    await generate_nginx_log("nginx_sample.log", 2)


if __name__ == "__main__":
    asyncio.run(main())