diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 0000000..3bd2eb8
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,31 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+ release:
+ types: [created]
+
+jobs:
+ deploy:
+
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: '3.x'
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install setuptools wheel twine
+ - name: Build and publish
+ env:
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+ run: |
+ python setup.py sdist bdist_wheel
+ twine upload dist/*
\ No newline at end of file
diff --git a/README.md b/README.md
index 738bf0e..4fbb0ab 100644
--- a/README.md
+++ b/README.md
@@ -26,40 +26,42 @@ The app is built with [Gradio](https://gradio.app/), which allows you to interac
- Set the `num_walk_steps` - for testing you can use a small number like 3 or 5, but to get great results you'll want to use something larger (60-200 steps).
- You can (and should) use the `name` input to separate out where the images/videos are saved. (Note that currently ffmpeg will not overwrite if you already made a video with the same name. You'll have to use ffmpeg to create the video yourself if the app fails to do so.)
-### The Script
+### Python Package
#### Setup
+Install the package
+
```
-git clone https://github.com/nateraw/stable-diffusion-videos
-cd stable-diffusion-videos
-pip install -r requirements.txt
+pip install stable_diffusion_videos
```
-#### Usage
+Authenticate with Hugging Face
-If you would prefer to use the `stable_diffusion_walk.py` script directly, you can do so by running:
-
-Run with `num_steps` set to 3 or 5 for testing, then up it to something like 60-200 for better results.
-
-```bash
-python stable_diffusion_walk.py \
- --prompts "['a cat', 'a dog', 'a horse']" \
- --seeds 903,123,42 \
- --output_dir dreams \
- --name animals_test \
- --guidance_scale 8.5 \
- --num_steps 5 \
- --height 512 \
- --width 512 \
- --num_inference_steps 50 \
- --scheduler klms \
- --disable_tqdm \
- --make_video \
- --use_lerp_for_text \
- --do_loop
+```
+huggingface-cli login
```
+#### Usage
+
+```python
+from stable_diffusion_videos import walk
+
+walk(
+ prompts=['a cat', 'a dog'],
+ seeds=[42, 1337],
+ output_dir='dreams', # Where images/videos will be saved
+ name='animals_test', # Subdirectory of output_dir where images/videos will be saved
+ guidance_scale=8.5, # Higher adheres to prompt more, lower lets model take the wheel
+ num_steps=5, # Change to 60-200 for better results...3-5 for testing
+ num_inference_steps=50,
+ scheduler='klms', # One of: "klms", "default", "ddim"
+ disable_tqdm=False, # Set to True to disable tqdm progress bar
+ make_video=True, # If false, just save images
+ use_lerp_for_text=True, # Use lerp for text embeddings instead of slerp
+ do_loop=False, # Change to True if you want last prompt to loop back to first prompt
+)
+```
## Credits
diff --git a/requirements.txt b/requirements.txt
index 5ee12c7..b340ccc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
transformers
-git+https://github.com/huggingface/diffusers@f085d2f5c6569a1c0d90327c51328622036ef76e
+diffusers==0.2.4
scipy
fire
gradio
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..f9c0261
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,28 @@
+from setuptools import find_packages, setup
+
+
+def get_version() -> str:
+ rel_path = "stable_diffusion_videos/__init__.py"
+ with open(rel_path, "r") as fp:
+ for line in fp.read().splitlines():
+ if line.startswith("__version__"):
+ delim = '"' if '"' in line else "'"
+ return line.split(delim)[1]
+ raise RuntimeError("Unable to find version string.")
+
+
+with open("requirements.txt", "r") as f:
+ requirements = f.read().splitlines()
+
+setup(
+ name="stable_diffusion_videos",
+ version=get_version(),
+ author="Nathan Raw",
+ author_email="naterawdata@gmail.com",
+ description=(
+ "Create π₯ videos with Stable Diffusion by exploring the latent space and morphing between text prompts."
+ ),
+ license="Apache",
+ install_requires=requirements,
+ packages=find_packages(),
+)
diff --git a/stable_diffusion_videos.ipynb b/stable_diffusion_videos.ipynb
index 212ca54..7eba97f 100644
--- a/stable_diffusion_videos.ipynb
+++ b/stable_diffusion_videos.ipynb
@@ -5,7 +5,7 @@
"colab": {
"provenance": [],
"collapsed_sections": [],
- "authorship_tag": "ABX9TyN/ZOFCUNqBdfOYeo31y+2Q",
+ "authorship_tag": "ABX9TyMDsciHN/HhWLYEdURcy00d",
"include_colab_link": true
},
"kernelspec": {
@@ -26,7 +26,7 @@
"colab_type": "text"
},
"source": [
- ""
+ ""
]
},
{
@@ -47,7 +47,7 @@
"Enjoy π€"
],
"metadata": {
- "id": "B4V57sVzLHu3"
+ "id": "z4GhhH25OdYq"
}
},
{
@@ -56,21 +56,19 @@
"## Setup"
],
"metadata": {
- "id": "0L9zhmONL81f"
+ "id": "dvdCBpWWOhW-"
}
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {
- "id": "klUR9ie1DVm-"
+ "id": "Xwfc0ej1L9A0"
},
"outputs": [],
"source": [
- "%%capture\n",
- "! git clone https://github.com/nateraw/stable-diffusion-videos\n",
- "%cd /content/stable-diffusion-videos/\n",
- "! pip install -r requirements.txt\n",
+ "# ! pip install stable_diffusion_videos\n",
+ "! pip install git+https://github.com/nateraw/stable-diffusion-videos@package-try-2\n",
"! git config --global credential.helper store"
]
},
@@ -82,7 +80,7 @@
"You have to be a registered user in π€ Hugging Face Hub, and you'll also need to use an access token for the code to work. For more information on access tokens, please refer to [this section of the documentation](https://huggingface.co/docs/hub/security-tokens)."
],
"metadata": {
- "id": "BoTBdktZDs8B"
+ "id": "dR5iVGYbOky5"
}
},
{
@@ -93,7 +91,7 @@
"notebook_login()"
],
"metadata": {
- "id": "8jVV1OLBDZ8o"
+ "id": "GmejIGhFMTXG"
},
"execution_count": null,
"outputs": []
@@ -104,7 +102,17 @@
"## Run the App π"
],
"metadata": {
- "id": "TbW39aWzIdsn"
+ "id": "H7UOKJhVOonb"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Optional - Connect Google Drive\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "GjfrKeeR2NQZ"
}
},
{
@@ -115,19 +123,43 @@
"This step will take a couple minutes the first time you run it."
],
"metadata": {
- "id": "jCWuRT78Jt0L"
+ "id": "g71hslP8OntM"
}
},
{
"cell_type": "code",
"source": [
- "# in case you restarted runtime, we need to be in this directory\n",
- "%cd /content/stable-diffusion-videos/\n",
+ "from stable_diffusion_videos import interface"
+ ],
+ "metadata": {
+ "id": "bgSNS368L-DV"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#@title Connect to Google Drive to Save Outputs\n",
+ "\n",
+ "#@markdown If you want to connect Google Drive, click the checkbox below and run this cell. You'll be prompted to authenticate.\n",
+ "\n",
+ "#@markdown If you just want to save your outputs in this Colab session, don't worry about this cell\n",
"\n",
- "from app import interface"
+ "connect_google_drive = True #@param {type:\"boolean\"}\n",
+ "\n",
+ "#@markdown Then, in the interface, use this path as the `output` in the Video tab to save your videos to Google Drive:\n",
+ "\n",
+ "#@markdown > /content/gdrive/MyDrive/stable_diffusion_videos\n",
+ "\n",
+ "\n",
+ "if connect_google_drive:\n",
+ " from google.colab import drive\n",
+ "\n",
+ " drive.mount('/content/gdrive')"
],
"metadata": {
- "id": "a6Eey_-YDvFc"
+ "id": "kidtsR3c2P9Z"
},
"execution_count": null,
"outputs": []
@@ -147,10 +179,12 @@
"2. Generate videos using the \"Videos\" tab\n",
" - Using the images you found from the step above, provide the prompts/seeds you recorded\n",
" - Set the `num_walk_steps` - for testing you can use a small number like 3 or 5, but to get great results you'll want to use something larger (60-200 steps). \n",
- " - You can (and should) use the `name` input to separate out where the images/videos are saved. "
+ " - You can (and should) use the `name` input to separate out where the images/videos are saved. \n",
+ "\n",
+ "π‘ **Pro tip** - Click the link that looks like `https://.gradio.app` below , and you'll be able to view it in full screen."
],
"metadata": {
- "id": "Po9vuzMnJzka"
+ "id": "VxjRVNnMOtgU"
}
},
{
@@ -159,7 +193,80 @@
"interface.launch()"
],
"metadata": {
- "id": "fflAEZaLIYGP"
+ "id": "8es3_onUOL3J"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "---"
+ ],
+ "metadata": {
+ "id": "mFCoTvlnPi4u"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Use `walk` programatically\n",
+ "\n",
+ "The other option is to not use the interface, and instead use `walk` programatically. Here's how you would do that..."
+ ],
+ "metadata": {
+ "id": "SjTQLCiLOWeo"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "First we define a helper fn for visualizing videos in colab"
+ ],
+ "metadata": {
+ "id": "fGQPClGwOR9R"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from IPython.display import HTML\n",
+ "from base64 import b64encode\n",
+ "\n",
+ "def visualize_video_colab(video_path):\n",
+ " mp4 = open(video_path,'rb').read()\n",
+ " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
+ " return HTML(\"\"\"\n",
+ " \n",
+ " \"\"\" % data_url)"
+ ],
+ "metadata": {
+ "id": "GqTWc8ZhNeLU"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Walk! πΆββοΈ"
+ ],
+ "metadata": {
+ "id": "Vd_RzwkoPM7X"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from stable_diffusion_videos import walk\n",
+ "\n",
+ "video_path = walk(['a cat', 'a dog'], [42, 1337], num_steps=3, make_video=True)\n",
+ "visualize_video_colab(video_path)"
+ ],
+ "metadata": {
+ "id": "Hv2wBZXXMQ-I"
},
"execution_count": null,
"outputs": []
diff --git a/stable_diffusion_videos/__init__.py b/stable_diffusion_videos/__init__.py
new file mode 100644
index 0000000..f94c713
--- /dev/null
+++ b/stable_diffusion_videos/__init__.py
@@ -0,0 +1,118 @@
+# ***********
+# vendored from https://github.com/scientific-python/lazy_loader
+import importlib
+import importlib.util
+import inspect
+import os
+import sys
+import types
+import warnings
+
+
+class _LazyImportWarning(Warning):
+ pass
+
+
+def _attach(package_name, submodules=None, submod_attrs=None):
+ """Attach lazily loaded submodules, functions, or other attributes.
+
+ Typically, modules import submodules and attributes as follows:
+
+ ```py
+ import mysubmodule
+ import anothersubmodule
+
+ from .foo import someattr
+ ```
+
+ The idea is to replace a package's `__getattr__`, `__dir__`, and
+ `__all__`, such that all imports work exactly the way they would
+ with normal imports, except that the import occurs upon first use.
+
+ The typical way to call this function, replacing the above imports, is:
+
+ ```python
+ __getattr__, __dir__, __all__ = lazy.attach(
+ __name__,
+ ['mysubmodule', 'anothersubmodule'],
+ {'foo': ['someattr']}
+ )
+ ```
+ This functionality requires Python 3.7 or higher.
+
+ Args:
+ package_name (`str`):
+ Typically use `__name__`.
+ submodules (`set`):
+ List of submodules to attach.
+ submod_attrs (`dict`):
+ Dictionary of submodule -> list of attributes / functions.
+ These attributes are imported as they are used.
+
+ Returns:
+ __getattr__, __dir__, __all__
+
+ """
+ if submod_attrs is None:
+ submod_attrs = {}
+
+ if submodules is None:
+ submodules = set()
+ else:
+ submodules = set(submodules)
+
+ attr_to_modules = {
+ attr: mod for mod, attrs in submod_attrs.items() for attr in attrs
+ }
+
+ __all__ = list(submodules | attr_to_modules.keys())
+
+ def __getattr__(name):
+ if name in submodules:
+ return importlib.import_module(f"{package_name}.{name}")
+ elif name in attr_to_modules:
+ submod_path = f"{package_name}.{attr_to_modules[name]}"
+ submod = importlib.import_module(submod_path)
+ attr = getattr(submod, name)
+
+ # If the attribute lives in a file (module) with the same
+ # name as the attribute, ensure that the attribute and *not*
+ # the module is accessible on the package.
+ if name == attr_to_modules[name]:
+ pkg = sys.modules[package_name]
+ pkg.__dict__[name] = attr
+
+ return attr
+ else:
+ raise AttributeError(f"No {package_name} attribute {name}")
+
+ def __dir__():
+ return __all__
+
+ if os.environ.get("EAGER_IMPORT", ""):
+ for attr in set(attr_to_modules.keys()) | submodules:
+ __getattr__(attr)
+
+ return __getattr__, __dir__, list(__all__)
+
+
+
+__getattr__, __dir__, __all__ = _attach(
+ __name__,
+ submodules=[],
+ submod_attrs={
+ "commands.user": ["notebook_login"],
+ "app": [
+ "interface",
+ ],
+ "stable_diffusion_pipeline": [
+ "StableDiffusionPipeline",
+ ],
+ "stable_diffusion_walk": [
+ "walk",
+ "SCHEDULERS",
+ ]
+ },
+)
+
+__version__ = "0.1.1"
\ No newline at end of file
diff --git a/app.py b/stable_diffusion_videos/app.py
similarity index 89%
rename from app.py
rename to stable_diffusion_videos/app.py
index a2f29e9..725e3a2 100644
--- a/app.py
+++ b/stable_diffusion_videos/app.py
@@ -1,7 +1,9 @@
+import time
+
import gradio as gr
import torch
-from stable_diffusion_walk import SCHEDULERS, pipeline, walk
+from .stable_diffusion_walk import SCHEDULERS, pipeline, walk
def fn_images(
@@ -35,7 +37,7 @@ def fn_videos(
do_loop,
disable_tqdm,
use_lerp_for_text,
- name,
+ output_dir,
):
prompts = [prompt_1, prompt_2]
seeds = [seed_1, seed_2]
@@ -52,7 +54,8 @@ def fn_videos(
num_steps=num_walk_steps,
num_inference_steps=num_inference_steps,
use_lerp_for_text=use_lerp_for_text,
- name=name,
+ output_dir=output_dir,
+ name=time.strftime("%Y%m%d-%H%M%S"),
scheduler=scheduler,
disable_tqdm=disable_tqdm,
)
@@ -74,9 +77,9 @@ def fn_videos(
gr.Checkbox(False),
gr.Checkbox(False),
gr.Textbox(
- "stable_diffusion_video",
+ "dreams",
placeholder=(
- "Name of this experiment. Change to avoid overwriting previous outputs"
+ "Folder where outputs will be saved. Each output will be saved in a new folder."
),
),
],
diff --git a/stable_diffusion_pipeline.py b/stable_diffusion_videos/stable_diffusion_pipeline.py
similarity index 93%
rename from stable_diffusion_pipeline.py
rename to stable_diffusion_videos/stable_diffusion_pipeline.py
index 21068ef..c147a13 100644
--- a/stable_diffusion_pipeline.py
+++ b/stable_diffusion_videos/stable_diffusion_pipeline.py
@@ -1,5 +1,6 @@
import inspect
import warnings
+from tqdm.auto import tqdm
from typing import List, Optional, Union
import torch
@@ -189,7 +190,7 @@ def __call__(
# scale and decode the image latents with vae
latents = 1 / 0.18215 * latents
- image = self.vae.decode(latents).sample
+ image = self.vae.decode(latents)
image = (image / 2 + 0.5).clamp(0, 1)
image = image.cpu().permute(0, 2, 3, 1).numpy()
@@ -219,3 +220,16 @@ def embed_text(self, text):
with torch.no_grad():
embed = self.text_encoder(text_input.input_ids.to(self.device))[0]
return embed
+
+ def progress_bar(self, iterable):
+ if not hasattr(self, "_progress_bar_config"):
+ self._progress_bar_config = {}
+ elif not isinstance(self._progress_bar_config, dict):
+ raise ValueError(
+ f"`self._progress_bar_config` should be of type `dict`, but is {type(self._progress_bar_config)}."
+ )
+
+ return tqdm(iterable, **self._progress_bar_config)
+
+ def set_progress_bar_config(self, **kwargs):
+ self._progress_bar_config = kwargs
\ No newline at end of file
diff --git a/stable_diffusion_walk.py b/stable_diffusion_videos/stable_diffusion_walk.py
similarity index 98%
rename from stable_diffusion_walk.py
rename to stable_diffusion_videos/stable_diffusion_walk.py
index 5a22c03..d742b85 100644
--- a/stable_diffusion_walk.py
+++ b/stable_diffusion_videos/stable_diffusion_walk.py
@@ -6,7 +6,7 @@
from diffusers.schedulers import (DDIMScheduler, LMSDiscreteScheduler,
PNDMScheduler)
-from stable_diffusion_pipeline import StableDiffusionPipeline
+from .stable_diffusion_pipeline import StableDiffusionPipeline
pipeline = StableDiffusionPipeline.from_pretrained(
"CompVis/stable-diffusion-v1-4",