import io import time import modal import os import torch import diffusers from pathlib import Path stub = modal.Stub("local-model-mount-test") model_id = "runwayml/stable-diffusion-v1-5" hf_token = os.getenv("HUGGINGFACE_TOKEN") local_path = "/tmp/hf-model" image = ( modal.Image.conda() .run_commands( [ "conda install xformers -c xformers/label/dev", "conda install pytorch torchvision pytorch-cuda=11.7 -c pytorch -c nvidia", ] ) .run_commands(["pip install diffusers[torch] transformers ftfy accelerate"]) ) stub.image = image def download_model(): euler = diffusers.EulerAncestralDiscreteScheduler.from_pretrained( model_id, subfolder="scheduler", use_auth_token=hf_token, cache_dir=local_path ) euler.save_pretrained(local_path) pipe = diffusers.StableDiffusionPipeline.from_pretrained( model_id, use_auth_token=hf_token, revision="fp16", torch_dtype=torch.float16, cache_dir=local_path ) pipe.save_pretrained(local_path) class StableDiffusion: def __enter__(self): import torch import diffusers torch.backends.cudnn.benchmark = True torch.backends.cuda.matmul.allow_tf32 = True euler = diffusers.EulerAncestralDiscreteScheduler.from_pretrained(local_path, subfolder="scheduler") self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(local_path, scheduler=euler).to("cuda") self.pipe.enable_xformers_memory_efficient_attention() @stub.function(gpu=modal.gpu.A100(), mounts=[ modal.Mount(local_dir=local_path, remote_dir=local_path) ]) def run_inference(self, prompt: str, steps: int = 20) -> bytes: import torch with torch.inference_mode(): image = self.pipe(prompt, num_inference_steps=steps, guidance_scale=7.0).images[0] # Convert to PNG bytes buf = io.BytesIO() image.save(buf, format="PNG") image_bytes = buf.getvalue() return image_bytes def run_inference(): samples = 10 prompt = "An 1600s oil painting of the New York City skyline" dir = Path("/tmp/stable-diffusion") if not dir.exists(): dir.mkdir(exist_ok=True, parents=True) with stub.run(): sd = StableDiffusion() for i in range(samples): t0 = time.time() image_bytes = sd.run_inference.call(prompt) output_path = dir / f"output_{i}.png" print(f"Sample {i} took {time.time()-t0:.3f}s. Saving it to {output_path}") with open(output_path, "wb") as f: f.write(image_bytes) if __name__ == "__main__": download_model() run_inference()