added cadence

rlsn · Mar 22, 2024 · 391deea · 391deea
1 parent 33e8c7a
commit 391deea
Show file tree

Hide file tree

Showing 7 changed files with 41 additions and 14 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+
+/preview
+/__pycache__
diff --git a/README.md b/README.md
@@ -1,7 +1,6 @@
 # Tripper
 A pipeline that generate consecutive sequence of images with SD1.5. It's quite a trip to watch images evolving. Future work may enable SDXL. Inspired by [deforum](https://deforum.art/), check it out.
-![](preview/sample1.gif)
-![](preview/sample2.gif)
+![](preview/sample1.gif) ![](preview/sample2.gif)
 # Features
 - support loading lora models (safetensors)
 - support prompt token size exceeding 75, plus negative prompt
@@ -10,12 +9,13 @@ A pipeline that generate consecutive sequence of images with SD1.5. It's quite a
 Edit the `config.json` to configure the settings:
 - `model_path`: path to your SD model safetensors
 - `generate_video`: set false to generate a batch of images to choose from as an initial image, then set true to switch to video mode.
-- `nsteps`: total number of images to generate
+- `nframes`: total number of frames to generate
 - `scheduler`: choose one from "euler", "euler a", "DDIM", "DDPM", "DPM++ 2M SDE Karras", "DPM++ 2M Karras"
 - `num_inference_steps`: per image
 - `lora_dict`: dictionary containing: {"[path_to_lora_safetensor]":[weight]}
-- `zoom`: zoom in(<1) or out(>1), 0.98~1.02 is a sensible value (no zooming with 1)
+- `zoom`: zoom in(<1) or out(>1)
 - `strength`: roughly the similarity between two consecutive images (0~1)
+- `diffusion_cadence`: how often the images get direct diffused, controls smoothness of the animation
 
 then run with
 ```

diff --git a/config.json b/config.json
@@ -3,18 +3,19 @@
     "scheduler": "DPM++ 2M SDE Karras",
     "generate_video": false,
     "init_image" : "preview/your_init_image.jpg",
-    "prompt": "masterpiece, best quality, ultra-detailed, illustration, 1girl, solo, outdoors, camping, night, mountains, nature, stars, moon, tent, twin ponytails, green eyes, cheerful, happy, backpack, sleeping bag, camping stove, water bottle, mountain boots, gloves, sweater, hat, flashlight, forest, rocks, river, wood, smoke, shadows, contrast, clear sky, constellations, Milky Way, peaceful, serene, quiet, tranquil, remote, secluded, adventurous, exploration, escape, independence, survival, resourcefulness, challenge, perseverance, stamina, endurance, observation, intuition, adaptability, creativity, imagination, artistry, inspiration, beauty, awe, wonder, gratitude, appreciation, relaxation, enjoyment, rejuvenation, mindfulness, awareness, connection, harmony, balance, texture, detail, realism, depth, perspective, composition, color, light, shadow, reflection, refraction, tone, contrast, foreground, middle ground, background, naturalistic, figurative, representational, impressionistic, expressionistic, abstract, innovative, experimental, unique",
+    "prompt": "masterpiece, best quality, realistic, detailed background, the forgotten city made of white marble, grand, epic, fantasy, mystical, sunbeam, soft lighting, volumetric lighting, dramatic",
     "negative_prompt": "(worst quality:2), (low quality:2), (normal quality:2), lowres, normal quality, ((monochrome)), ((grayscale)), bad anatomy,ng_deepnegative_v1_75t,easynegative, badhandv4, text, watermark,",
     "strength":0.65,
     "num_img":6,
-    "guidance_scale":6.5,
-    "num_inference_steps":40,
-    "nsteps":80,
-    "width":768,
-    "height":512,
-    "zoom":0.98,
+    "guidance_scale":7,
+    "num_inference_steps":25,
+    "nframes":80,
+    "width":512,
+    "height":768,
+    "zoom":1.05,
     "out_dir":"preview",
     "fps":10,
+    "diffusion_cadence":2,
     "lora_dict":{
         "your_lora_1.safetensors":0.5,
         "your_lora_2.safetensors":0.8

diff --git a/preview/sample1.gif b/preview/sample1.gif
diff --git a/preview/sample2.gif b/preview/sample2.gif
diff --git a/run.py b/run.py
@@ -9,7 +9,7 @@
 import diffusers
 import argparse
 from attrdict import AttrDict
-from utils import const_schedule, zoom, export_as_gif, timestr
+from utils import const_schedule, zoom, export_as_gif, timestr, interpolate_video
 import PIL
 
 if __name__=="__main__":
@@ -24,11 +24,12 @@
     tripper.set_scheduler(schedulers[config.scheduler])
     if config.generate_video:
         config.init_image = PIL.Image.open(config.init_image)
-
         # strength schedule
-        config.strength_schedule = const_schedule(config.strength,config.nsteps)       
+        config.strength_schedule = const_schedule(config.strength,config.nframes)       
         config.transform_fn = lambda img,s: zoom(img, config.zoom)
+        config.nsteps=int(config.nframes//config.diffusion_cadence)
         imgs = tripper.generate_video(**config)
+        imgs = interpolate_video(imgs, config.diffusion_cadence)
         export_as_gif(f"{config.out_dir}/{timestr()}.gif", imgs, frames_per_second=config.fps)
     else:
         tripper.txt2img(**config)
diff --git a/utils.py b/utils.py
@@ -164,6 +164,8 @@ def zoom(im,ratio):
         nim = np.pad(im, ((m[1], m[1]), (m[0], m[0]), (0, 0)), mode='symmetric') 
         nim = Image.fromarray(nim)
         return nim.resize(s)
+    else:
+        return im
 
 def impulse_schedule(floor,ceiling,impulse,width,steps):
     x = np.arange(steps)
@@ -175,3 +177,23 @@ def impulse_schedule(floor,ceiling,impulse,width,steps):
     Y=np.array(Y).sum(0)
     print(Y.shape)
     return Y+floor
+
+def interpolation(img1, img2, num_frame=1):
+    img1 = np.array(img1,dtype=float)
+    img2 = np.array(img2,dtype=float)
+    d = (img2 - img1)/(num_frame+1)
+    imgs = []
+    for i in range(1,num_frame+1):
+        im = img1+d*i
+        imgs.append(Image.fromarray(im.astype(np.uint8)))
+    return imgs
+
+def interpolate_video(imgs, cadence=2):
+    if cadence<=1:
+        return imgs
+    else:
+        result = []
+        for i in range(len(imgs)-1):
+            result+=[imgs[i]]+interpolation(imgs[i],imgs[i+1],cadence-1)
+        result+=[imgs[-1]]
+    return result