adapted to latest diffuser

rlsn · Mar 21, 2024 · 28757a7 · 28757a7
1 parent 62ef61f
commit 28757a7
Show file tree

Hide file tree

Showing 8 changed files with 135 additions and 51 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,24 @@
+# Tripper
+A wrapper that generate consecutive sequence of images with SD1.5. It's quite a trip to watch images evolving. Future work may enable SDXL. Inspired by [deforum](https://deforum.art/), check it out.
+![](preview/sample1.gif)
+![](preview/sample2.gif)
+# features
+- support loading lora models (safetensors)
+- support prompt token size exceeding 75, plus negative prompt
+- frame zoom in/out 
+# Usage
+Edit the `config.json` to configure the settings:
+- `model_path`: path to your SD model safetensors
+- `generate_video`: set false to generate a batch of images to choose from as an initial image, then set true to switch to video mode.
+- `nsteps`: total number of images to generate
+- `scheduler`: choose one from "euler", "euler a", "DDIM", "DDPM", "DPM++ 2M SDE Karras", "DPM++ 2M Karras"
+- `num_inference_steps`: per image
+- `lora_dict`: dictionary containing: {"[path_to_lora_safetensor]":[weight]}
+- `zoom`: zoom in(<1) or out(>1), 0.98~1.02 is a sensible value (no zooming with 1)
+- `strength`: roughly the similarity between two consecutive images (0~1)
+then run with
+```
+python run.py
+```
+
+Have fun
diff --git a/attrdict.py b/attrdict.py
@@ -0,0 +1,18 @@
+"""
+An attribute dictionary that's very handy everywhere
+"""
+import json
+class AttrDict(dict):
+    def __init__(self, *args, **kwargs):
+        super(AttrDict, self).__init__(*args, **kwargs)
+        self.__dict__ = self
+
+    def to_json(self, filename):
+        with open(filename, "w") as wf:
+            wf.write(json.dumps(self, indent=4))
+
+    @classmethod
+    def from_json(cls, filename):
+        with open(filename, "r") as f:
+            obj = cls(json.loads(f.read()))
+        return obj
diff --git a/config.json b/config.json
@@ -0,0 +1,22 @@
+{
+    "model_path": "your_model.safetensors",
+    "scheduler": "DPM++ 2M SDE Karras",
+    "generate_video": false,
+    "init_image" : "preview/your_init_image.jpg",
+    "prompt": "masterpiece, best quality, ultra-detailed, illustration, 1girl, solo, outdoors, camping, night, mountains, nature, stars, moon, tent, twin ponytails, green eyes, cheerful, happy, backpack, sleeping bag, camping stove, water bottle, mountain boots, gloves, sweater, hat, flashlight, forest, rocks, river, wood, smoke, shadows, contrast, clear sky, constellations, Milky Way, peaceful, serene, quiet, tranquil, remote, secluded, adventurous, exploration, escape, independence, survival, resourcefulness, challenge, perseverance, stamina, endurance, observation, intuition, adaptability, creativity, imagination, artistry, inspiration, beauty, awe, wonder, gratitude, appreciation, relaxation, enjoyment, rejuvenation, mindfulness, awareness, connection, harmony, balance, texture, detail, realism, depth, perspective, composition, color, light, shadow, reflection, refraction, tone, contrast, foreground, middle ground, background, naturalistic, figurative, representational, impressionistic, expressionistic, abstract, innovative, experimental, unique",
+    "negative_prompt": "(worst quality:2), (low quality:2), (normal quality:2), lowres, normal quality, ((monochrome)), ((grayscale)), bad anatomy,ng_deepnegative_v1_75t,easynegative, badhandv4, text, watermark,",
+    "strength":0.65,
+    "num_img":6,
+    "guidance_scale":6.5,
+    "num_inference_steps":40,
+    "nsteps":80,
+    "width":768,
+    "height":512,
+    "zoom":0.98,
+    "out_dir":"preview",
+    "fps":10,
+    "lora_dict":{
+        "your_lora_1.safetensors":0.5,
+        "your_lora_2.safetensors":0.8
+    }
+}
diff --git a/preview/sample1.gif b/preview/sample1.gif
diff --git a/preview/sample2.gif b/preview/sample2.gif
diff --git a/run.py b/run.py
@@ -0,0 +1,34 @@
+#
+# Created on Mon Jul 25 2023
+#
+# Copyright (c) 2023 rlsn
+#
+# !pip install diffusers transformers safetensors einops scipy
+
+from tripper import Tripper, schedulers
+import diffusers
+import argparse
+from attrdict import AttrDict
+from utils import const_schedule, zoom, export_as_gif, timestr
+import PIL
+
+if __name__=="__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config_file', type=str, help="filename of the running configuration", default="config.json")
+
+    args = parser.parse_args()
+    config = AttrDict.from_json(args.config_file)
+    print(f"running with config:{config}")
+
+    tripper = Tripper(config.model_path)
+    tripper.set_scheduler(schedulers[config.scheduler])
+    if config.generate_video:
+        config.init_image = PIL.Image.open(config.init_image)
+
+        # strength schedule
+        config.strength_schedule = const_schedule(config.strength,config.nsteps)       
+        config.transform_fn = lambda img,s: zoom(img, config.zoom)
+        imgs = tripper.generate_video(**config)
+        export_as_gif(f"{config.out_dir}/{timestr()}.gif", imgs, frames_per_second=config.fps)
+    else:
+        tripper.txt2img(**config)
diff --git a/tripper.py b/tripper.py
@@ -1,15 +1,29 @@
+#
+# Created on Mon Jul 25 2023
+# a stable diffusion pipline to generate transforming images based on text description
+# Copyright (c) 2023 rlsn
+#
 import diffusers
 from diffusers import (StableDiffusionPipeline, StableDiffusionImg2ImgPipeline)
 import torch
-from utils import *
+from utils import load_lora_weights, convert_prompt_embeds, clean_prompt, timestr
 import os,json
 
+schedulers = {
+    "euler": diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler,
+    "euler a":diffusers.schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteScheduler,
+    "DDIM":diffusers.schedulers.scheduling_ddim.DDIMScheduler,
+    "DDPM":diffusers.schedulers.scheduling_ddpm.DDPMScheduler,
+    "DPM++ 2M SDE Karras":diffusers.schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteScheduler,
+    "DPM++ 2M Karras":diffusers.schedulers.scheduling_k_dpm_2_ancestral_discrete.KDPM2AncestralDiscreteScheduler,
+}
+
 class Tripper(object):
     def __init__(self, model_file):
-        txt2img_pipe = StableDiffusionPipeline.from_ckpt(model_file, torch_dtype=torch.float16)
-        txt2img_pipe.safety_checker = lambda images,**kwargs: (images, [False] * len(images))
+        txt2img_pipe = StableDiffusionPipeline.from_single_file(model_file, torch_dtype=torch.float16)
         img2img_pipe = StableDiffusionImg2ImgPipeline(**txt2img_pipe.components)
-        img2img_pipe.safety_checker = lambda images,**kwags: (images, [False] * len(images))
+        txt2img_pipe.safety_checker = None
+        img2img_pipe.safety_checker = None
 
         self.txt2img_pipe = txt2img_pipe.to('cuda')
         self.img2img_pipe = img2img_pipe.to("cuda")
@@ -42,7 +56,7 @@ def unload_lora(self, lora_dict):
 
     def txt2img(self, prompt, negative_prompt, lora_dict,
                 width=512, height=768, num_img=6, guidance_scale=7, num_inference_steps=25,
-                out_dir="preview"):
+                out_dir="preview", **kargs):
         os.makedirs(out_dir, exist_ok = True)
 
         self.load_lora(lora_dict)
@@ -59,13 +73,13 @@ def txt2img(self, prompt, negative_prompt, lora_dict,
         for i,img in enumerate(images):
             fn = f"{out_dir}/{timestr()}_{i}.jpg"
             img.convert("RGB").save(fn)
-        self.unload_lora(lora_dict)
+        # self.unload_lora(lora_dict)
 
         return images
 
     def img2img(self, image, prompt, negative_prompt, lora_dict, strength=0.5,
                 num_img=6, guidance_scale=7, num_inference_steps=25,
-                out_dir="preview"):
+                out_dir="preview", **kargs):
         os.makedirs(out_dir, exist_ok = True)
 
         self.load_lora(lora_dict)
@@ -91,7 +105,7 @@ def generate_video(self, init_image, prompt, negative_prompt,
                         transform_fn,
                         guidance_scale=7, 
                         num_inference_steps=40,
-                        out_dir="preview"):
+                        out_dir="preview", **kargs):
 
         os.makedirs(out_dir, exist_ok = True)
 
@@ -123,42 +137,5 @@ def generate_video(self, init_image, prompt, negative_prompt,
             fn = out_dir+"/%06d.jpg"%s
             images[-1].convert("RGB").save(fn)
 
-        self.unload_lora(lora_dict)
-        return images
-
-
-    # def batch_generate(self, general_prompt, character_dict, addition_list, lora_dict,
-    #              negative_prompt, img_per_comb=6, save_dir=".", guidance_scale=7, num_inference_steps=25):
-    # for character in character_dict:
-    #     try:
-    #         pipeline = load_lora_weights(pipeline, character, 1., 'cuda', torch.float32, load=True)
-    #         print(f"loaded {character}")
-    #     except:
-    #         continue
-    #     for lora in lora_dict:
-    #         try:
-    #             pipeline = load_lora_weights(pipeline, lora, 1., 'cuda', torch.float32, load=True)
-    #             print(f"loaded {lora}")
-    #         except:
-    #             continue
-    #         for addition in addition_list:
-    #             width = lora_dict[lora][1]
-    #             height = lora_dict[lora][2]
-    #             prompt = general_prompt + lora_dict[lora][0] + addition + character_dict[character]
-    #             prompt = clean_prompt(prompt)
-    #             prompt_embeds, negative_prompt_embeds = convert_prompt_embeds(pipeline, prompt, negative_prompt)
-    #             images = txt2img_pipe(prompt_embeds=prompt_embeds,
-    #                                   negative_prompt_embeds=negative_prompt_embeds,
-    #                                   guidance_scale=guidance_scale,
-    #                                   num_images_per_prompt=img_per_comb,
-    #                                   num_inference_steps=num_inference_steps,
-    #                                   height=height, width=width,
-    #                                   ).images
-    #             for img in images:
-    #                 fn = f"{save_dir}/{character.split('.')[0]}_{lora.split('.')[0]}_{int(np.random.rand()*1e6)}.jpg"
-    #                 img.convert("RGB").save(fn)
-    #                 print(f"saved {fn}")
-    #         pipeline = load_lora_weights(pipeline, lora, 1., 'cuda', torch.float32, load=False)
-    #         print(f"unloaded {lora}")
-    #     pipeline = load_lora_weights(pipeline, character, 1., 'cuda', torch.float32, load=False)
-    #     print(f"unloaded {character}")
+        # self.unload_lora(lora_dict)
+        return images
diff --git a/utils.py b/utils.py
@@ -1,3 +1,8 @@
+#
+# Created on Mon Jul 25 2023
+#
+# 2023 rlsn
+#
 from collections import defaultdict
 from einops import einsum
 import torch
@@ -151,10 +156,14 @@ def zoom(im,ratio):
         m = (s[0]-w)/2,(s[1]-h)/2
         nim = im.crop((m[0], m[1], s[0]-m[0], s[1]-m[1]))
         return nim.resize(s)
-    else:
-        # todo
-        return im
-
+    elif ratio>1:
+        s = im.size
+        r=ratio-1
+        m = int((s[0]*r)//2), int((s[1]*r)//2)
+        im = np.array(im)
+        nim = np.pad(im, ((m[1], m[1]), (m[0], m[0]), (0, 0)), mode='symmetric') 
+        nim = Image.fromarray(nim)
+        return nim.resize(s)
 
 def impulse_schedule(floor,ceiling,impulse,width,steps):
     x = np.arange(steps)