from PIL import Image import stable_inference import numpy as np from einops import repeat # Interpolate video between two conditionings FOLDER = 'test/painting' MAX_STRENGTH = 0.5 # Strength at maximum in the middle of the interpolation SEED = 9001 SECONDS = 10 FRAMES_PER_SEC = 30 STEPS_IN_OUT = int((SECONDS * FRAMES_PER_SEC) / 2) # Get these from https://huggingface.co/spaces/pharma/CLIP-Interrogator prompt1 = 'a painting of a woman with a monkey on her shoulder, inspired by Kahlo, behance contest winner, large bushy eyebrows, resembling a mix of grimes, sharp black eyebrows, in a highly detailed jungle, woman with braided brown hair, raised eyebrows' prompt2 = 'a painting of a girl with a pearl end, by Vermeer, pixabay contest winner, academic art, long flowing cape and cowl, lorem ipsum dolor sit amet, she looks like a mix of grimes, twitter pfp, corel paintshop pro, triadic colours, an oversized beret' # Images must be same size START_IMAGE_FILE = 'frida.png' END_IMAGE_FILE = 'earring.png' engine = stable_inference.StableDiffusionInference( checkpoint_loc='./sd-v1-5-inpainting.ckpt', ) start_pil = Image.open(START_IMAGE_FILE) start_tensor, (_, _) = stable_inference.util.load_img(img=start_pil) start_tensor = start_tensor.half().to('cuda') end_pil = Image.open(END_IMAGE_FILE) end_tensor, (_, _) = stable_inference.util.load_img(img=end_pil) end_tensor = end_tensor.half().to('cuda') start = engine.model.get_first_stage_encoding( engine.model.encode_first_stage(start_tensor)) end = engine.model.get_first_stage_encoding( engine.model.encode_first_stage(end_tensor)) ( conditioning_start, unconditioning, # Reuse this as it's the same for both weighted_subprompts_start, _, # Don't need the individual embedding managers ) = engine.compute_conditioning_and_weights( prompt1, 1) ( conditioning_end, _, weighted_subprompts_end, _, # Don't need the individual embedding managers ) = engine.compute_conditioning_and_weights( prompt2, 1) weighted_subprompts = stable_inference.util.combine_weighted_subprompts(0.5, weighted_subprompts_start, weighted_subprompts_end) strength_schedule = [] for itr, i in enumerate(np.linspace(0., 1., STEPS_IN_OUT)**(1/2)): print('step', itr, i) prob = i c = stable_inference.util.slerp( prob / 2, conditioning_start, conditioning_end, ) slerped = stable_inference.util.slerp(prob / 2, start, end) strength_schedule.append(MAX_STRENGTH * prob + 0.01) _, extra_data = engine.sample( '', 1, 'dpmpp_2m', SEED, 50, conditioning=c, init_latent=repeat(slerped, '1 ... -> b ...', b=1), scale=7.5, strength=MAX_STRENGTH * prob + 0.01, unconditioning=unconditioning, weighted_subprompts=weighted_subprompts, ) extra_data['images'][0].save(f'{FOLDER}/{itr}.png') strength_schedule.reverse() for itr, i in enumerate(np.flip(np.linspace(0., 1., STEPS_IN_OUT)**(1/2))): print('step', itr + STEPS_IN_OUT, i) prob = i c = stable_inference.util.slerp( 1 - (prob / 2), conditioning_start, conditioning_end, ) slerped = stable_inference.util.slerp(0.5 + (1 - prob) / 2, start, end) _, extra_data = engine.sample( '', 1, 'dpmpp_2m', SEED, 50, conditioning=c, init_latent=repeat(slerped, '1 ... -> b ...', b=1), scale=7.5, strength=strength_schedule[itr], unconditioning=unconditioning, weighted_subprompts=weighted_subprompts, ) extra_data['images'][0].save(f'{FOLDER}/{itr + STEPS_IN_OUT}.png') # Make a video @ 30 fps, 512x512 image size # ffmpeg -r 30 -f image2 -s 512x512 -i test/%d.png -vcodec libx264 -crf 25 -pix_fmt yuv420p test.mp4