import io
import time
import modal
import os
import torch
import diffusers

from pathlib import Path

stub = modal.Stub("local-model-mount-test")

model_id = "runwayml/stable-diffusion-v1-5"
hf_token = os.getenv("HUGGINGFACE_TOKEN")
local_path = "/tmp/hf-model"


image = (
    modal.Image.conda()
    .run_commands(
        [
            "conda install xformers -c xformers/label/dev",
            "conda install pytorch torchvision pytorch-cuda=11.7 -c pytorch -c nvidia",
        ]
    )
    .run_commands(["pip install diffusers[torch] transformers ftfy accelerate"])
)
stub.image = image


def download_model():
    euler = diffusers.EulerAncestralDiscreteScheduler.from_pretrained(
        model_id, subfolder="scheduler", use_auth_token=hf_token, cache_dir=local_path
    )
    euler.save_pretrained(local_path)

    pipe = diffusers.StableDiffusionPipeline.from_pretrained(
        model_id, use_auth_token=hf_token, revision="fp16", torch_dtype=torch.float16, cache_dir=local_path
    )
    pipe.save_pretrained(local_path)

class StableDiffusion:
    def __enter__(self):
        import torch
        import diffusers

        torch.backends.cudnn.benchmark = True
        torch.backends.cuda.matmul.allow_tf32 = True

        euler = diffusers.EulerAncestralDiscreteScheduler.from_pretrained(local_path, subfolder="scheduler")
        self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(local_path, scheduler=euler).to("cuda")
        self.pipe.enable_xformers_memory_efficient_attention()

    @stub.function(gpu=modal.gpu.A100(), mounts=[
        modal.Mount(local_dir=local_path, remote_dir=local_path)
    ])
    def run_inference(self, prompt: str, steps: int = 20) -> bytes:
        import torch

        with torch.inference_mode():
            image = self.pipe(prompt, num_inference_steps=steps, guidance_scale=7.0).images[0]

        # Convert to PNG bytes
        buf = io.BytesIO()
        image.save(buf, format="PNG")
        image_bytes = buf.getvalue()
        return image_bytes

def run_inference():
    samples = 10
    prompt = "An 1600s oil painting of the New York City skyline"
    dir = Path("/tmp/stable-diffusion")
    if not dir.exists():
        dir.mkdir(exist_ok=True, parents=True)

    with stub.run():
        sd = StableDiffusion()
        for i in range(samples):
            t0 = time.time()
            image_bytes = sd.run_inference.call(prompt)
            output_path = dir / f"output_{i}.png"
            print(f"Sample {i} took {time.time()-t0:.3f}s. Saving it to {output_path}")
            with open(output_path, "wb") as f:
                f.write(image_bytes)


if __name__ == "__main__":
    download_model()
    run_inference()