diff --git a/README.md b/README.md new file mode 100644 index 0000000..0167381 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Tripper +A wrapper that generate consecutive sequence of images with SD1.5. It's quite a trip to watch images evolving. Future work may enable SDXL. Inspired by [deforum](https://deforum.art/), check it out. +![](preview/sample1.gif) +![](preview/sample2.gif) +# features +- support loading lora models (safetensors) +- support prompt token size exceeding 75, plus negative prompt +- frame zoom in/out +# Usage +Edit the `config.json` to configure the settings: +- `model_path`: path to your SD model safetensors +- `generate_video`: set false to generate a batch of images to choose from as an initial image, then set true to switch to video mode. +- `nsteps`: total number of images to generate +- `scheduler`: choose one from "euler", "euler a", "DDIM", "DDPM", "DPM++ 2M SDE Karras", "DPM++ 2M Karras" +- `num_inference_steps`: per image +- `lora_dict`: dictionary containing: {"[path_to_lora_safetensor]":[weight]} +- `zoom`: zoom in(<1) or out(>1), 0.98~1.02 is a sensible value (no zooming with 1) +- `strength`: roughly the similarity between two consecutive images (0~1) +then run with +``` +python run.py +``` + +Have fun \ No newline at end of file diff --git a/attrdict.py b/attrdict.py new file mode 100644 index 0000000..2aaf8e2 --- /dev/null +++ b/attrdict.py @@ -0,0 +1,18 @@ +""" +An attribute dictionary that's very handy everywhere +""" +import json +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + def to_json(self, filename): + with open(filename, "w") as wf: + wf.write(json.dumps(self, indent=4)) + + @classmethod + def from_json(cls, filename): + with open(filename, "r") as f: + obj = cls(json.loads(f.read())) + return obj \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..c42b5b5 --- /dev/null +++ b/config.json @@ -0,0 +1,22 @@ +{ + "model_path": "your_model.safetensors", + "scheduler": "DPM++ 2M SDE Karras", + "generate_video": false, + "init_image" : "preview/your_init_image.jpg", + "prompt": "masterpiece, best quality, ultra-detailed, illustration, 1girl, solo, outdoors, camping, night, mountains, nature, stars, moon, tent, twin ponytails, green eyes, cheerful, happy, backpack, sleeping bag, camping stove, water bottle, mountain boots, gloves, sweater, hat, flashlight, forest, rocks, river, wood, smoke, shadows, contrast, clear sky, constellations, Milky Way, peaceful, serene, quiet, tranquil, remote, secluded, adventurous, exploration, escape, independence, survival, resourcefulness, challenge, perseverance, stamina, endurance, observation, intuition, adaptability, creativity, imagination, artistry, inspiration, beauty, awe, wonder, gratitude, appreciation, relaxation, enjoyment, rejuvenation, mindfulness, awareness, connection, harmony, balance, texture, detail, realism, depth, perspective, composition, color, light, shadow, reflection, refraction, tone, contrast, foreground, middle ground, background, naturalistic, figurative, representational, impressionistic, expressionistic, abstract, innovative, experimental, unique", + "negative_prompt": "(worst quality:2), (low quality:2), (normal quality:2), lowres, normal quality, ((monochrome)), ((grayscale)), bad anatomy,ng_deepnegative_v1_75t,easynegative, badhandv4, text, watermark,", + "strength":0.65, + "num_img":6, + "guidance_scale":6.5, + "num_inference_steps":40, + "nsteps":80, + "width":768, + "height":512, + "zoom":0.98, + "out_dir":"preview", + "fps":10, + "lora_dict":{ + "your_lora_1.safetensors":0.5, + "your_lora_2.safetensors":0.8 + } +} \ No newline at end of file diff --git a/preview/sample1.gif b/preview/sample1.gif new file mode 100644 index 0000000..bed212b Binary files /dev/null and b/preview/sample1.gif differ diff --git a/preview/sample2.gif b/preview/sample2.gif new file mode 100644 index 0000000..815ecb5 Binary files /dev/null and b/preview/sample2.gif differ diff --git a/run.py b/run.py new file mode 100644 index 0000000..527a5f9 --- /dev/null +++ b/run.py @@ -0,0 +1,34 @@ +# +# Created on Mon Jul 25 2023 +# +# Copyright (c) 2023 rlsn +# +# !pip install diffusers transformers safetensors einops scipy + +from tripper import Tripper, schedulers +import diffusers +import argparse +from attrdict import AttrDict +from utils import const_schedule, zoom, export_as_gif, timestr +import PIL + +if __name__=="__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--config_file', type=str, help="filename of the running configuration", default="config.json") + + args = parser.parse_args() + config = AttrDict.from_json(args.config_file) + print(f"running with config:{config}") + + tripper = Tripper(config.model_path) + tripper.set_scheduler(schedulers[config.scheduler]) + if config.generate_video: + config.init_image = PIL.Image.open(config.init_image) + + # strength schedule + config.strength_schedule = const_schedule(config.strength,config.nsteps) + config.transform_fn = lambda img,s: zoom(img, config.zoom) + imgs = tripper.generate_video(**config) + export_as_gif(f"{config.out_dir}/{timestr()}.gif", imgs, frames_per_second=config.fps) + else: + tripper.txt2img(**config) \ No newline at end of file diff --git a/tripper.py b/tripper.py index 9a3fccf..49bb667 100644 --- a/tripper.py +++ b/tripper.py @@ -1,15 +1,29 @@ +# +# Created on Mon Jul 25 2023 +# a stable diffusion pipline to generate transforming images based on text description +# Copyright (c) 2023 rlsn +# import diffusers from diffusers import (StableDiffusionPipeline, StableDiffusionImg2ImgPipeline) import torch -from utils import * +from utils import load_lora_weights, convert_prompt_embeds, clean_prompt, timestr import os,json +schedulers = { + "euler": diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler, + "euler a":diffusers.schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteScheduler, + "DDIM":diffusers.schedulers.scheduling_ddim.DDIMScheduler, + "DDPM":diffusers.schedulers.scheduling_ddpm.DDPMScheduler, + "DPM++ 2M SDE Karras":diffusers.schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteScheduler, + "DPM++ 2M Karras":diffusers.schedulers.scheduling_k_dpm_2_ancestral_discrete.KDPM2AncestralDiscreteScheduler, +} + class Tripper(object): def __init__(self, model_file): - txt2img_pipe = StableDiffusionPipeline.from_ckpt(model_file, torch_dtype=torch.float16) - txt2img_pipe.safety_checker = lambda images,**kwargs: (images, [False] * len(images)) + txt2img_pipe = StableDiffusionPipeline.from_single_file(model_file, torch_dtype=torch.float16) img2img_pipe = StableDiffusionImg2ImgPipeline(**txt2img_pipe.components) - img2img_pipe.safety_checker = lambda images,**kwags: (images, [False] * len(images)) + txt2img_pipe.safety_checker = None + img2img_pipe.safety_checker = None self.txt2img_pipe = txt2img_pipe.to('cuda') self.img2img_pipe = img2img_pipe.to("cuda") @@ -42,7 +56,7 @@ def unload_lora(self, lora_dict): def txt2img(self, prompt, negative_prompt, lora_dict, width=512, height=768, num_img=6, guidance_scale=7, num_inference_steps=25, - out_dir="preview"): + out_dir="preview", **kargs): os.makedirs(out_dir, exist_ok = True) self.load_lora(lora_dict) @@ -59,13 +73,13 @@ def txt2img(self, prompt, negative_prompt, lora_dict, for i,img in enumerate(images): fn = f"{out_dir}/{timestr()}_{i}.jpg" img.convert("RGB").save(fn) - self.unload_lora(lora_dict) + # self.unload_lora(lora_dict) return images def img2img(self, image, prompt, negative_prompt, lora_dict, strength=0.5, num_img=6, guidance_scale=7, num_inference_steps=25, - out_dir="preview"): + out_dir="preview", **kargs): os.makedirs(out_dir, exist_ok = True) self.load_lora(lora_dict) @@ -91,7 +105,7 @@ def generate_video(self, init_image, prompt, negative_prompt, transform_fn, guidance_scale=7, num_inference_steps=40, - out_dir="preview"): + out_dir="preview", **kargs): os.makedirs(out_dir, exist_ok = True) @@ -123,42 +137,5 @@ def generate_video(self, init_image, prompt, negative_prompt, fn = out_dir+"/%06d.jpg"%s images[-1].convert("RGB").save(fn) - self.unload_lora(lora_dict) - return images - - - # def batch_generate(self, general_prompt, character_dict, addition_list, lora_dict, - # negative_prompt, img_per_comb=6, save_dir=".", guidance_scale=7, num_inference_steps=25): - # for character in character_dict: - # try: - # pipeline = load_lora_weights(pipeline, character, 1., 'cuda', torch.float32, load=True) - # print(f"loaded {character}") - # except: - # continue - # for lora in lora_dict: - # try: - # pipeline = load_lora_weights(pipeline, lora, 1., 'cuda', torch.float32, load=True) - # print(f"loaded {lora}") - # except: - # continue - # for addition in addition_list: - # width = lora_dict[lora][1] - # height = lora_dict[lora][2] - # prompt = general_prompt + lora_dict[lora][0] + addition + character_dict[character] - # prompt = clean_prompt(prompt) - # prompt_embeds, negative_prompt_embeds = convert_prompt_embeds(pipeline, prompt, negative_prompt) - # images = txt2img_pipe(prompt_embeds=prompt_embeds, - # negative_prompt_embeds=negative_prompt_embeds, - # guidance_scale=guidance_scale, - # num_images_per_prompt=img_per_comb, - # num_inference_steps=num_inference_steps, - # height=height, width=width, - # ).images - # for img in images: - # fn = f"{save_dir}/{character.split('.')[0]}_{lora.split('.')[0]}_{int(np.random.rand()*1e6)}.jpg" - # img.convert("RGB").save(fn) - # print(f"saved {fn}") - # pipeline = load_lora_weights(pipeline, lora, 1., 'cuda', torch.float32, load=False) - # print(f"unloaded {lora}") - # pipeline = load_lora_weights(pipeline, character, 1., 'cuda', torch.float32, load=False) - # print(f"unloaded {character}") \ No newline at end of file + # self.unload_lora(lora_dict) + return images \ No newline at end of file diff --git a/utils.py b/utils.py index 456a89e..7fc8ebd 100644 --- a/utils.py +++ b/utils.py @@ -1,3 +1,8 @@ +# +# Created on Mon Jul 25 2023 +# +# 2023 rlsn +# from collections import defaultdict from einops import einsum import torch @@ -151,10 +156,14 @@ def zoom(im,ratio): m = (s[0]-w)/2,(s[1]-h)/2 nim = im.crop((m[0], m[1], s[0]-m[0], s[1]-m[1])) return nim.resize(s) - else: - # todo - return im - + elif ratio>1: + s = im.size + r=ratio-1 + m = int((s[0]*r)//2), int((s[1]*r)//2) + im = np.array(im) + nim = np.pad(im, ((m[1], m[1]), (m[0], m[0]), (0, 0)), mode='symmetric') + nim = Image.fromarray(nim) + return nim.resize(s) def impulse_schedule(floor,ceiling,impulse,width,steps): x = np.arange(steps)