diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..3bd2eb8 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,31 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* \ No newline at end of file diff --git a/README.md b/README.md index 738bf0e..4fbb0ab 100644 --- a/README.md +++ b/README.md @@ -26,40 +26,42 @@ The app is built with [Gradio](https://gradio.app/), which allows you to interac - Set the `num_walk_steps` - for testing you can use a small number like 3 or 5, but to get great results you'll want to use something larger (60-200 steps). - You can (and should) use the `name` input to separate out where the images/videos are saved. (Note that currently ffmpeg will not overwrite if you already made a video with the same name. You'll have to use ffmpeg to create the video yourself if the app fails to do so.) -### The Script +### Python Package #### Setup +Install the package + ``` -git clone https://github.com/nateraw/stable-diffusion-videos -cd stable-diffusion-videos -pip install -r requirements.txt +pip install stable_diffusion_videos ``` -#### Usage +Authenticate with Hugging Face -If you would prefer to use the `stable_diffusion_walk.py` script directly, you can do so by running: - -Run with `num_steps` set to 3 or 5 for testing, then up it to something like 60-200 for better results. - -```bash -python stable_diffusion_walk.py \ - --prompts "['a cat', 'a dog', 'a horse']" \ - --seeds 903,123,42 \ - --output_dir dreams \ - --name animals_test \ - --guidance_scale 8.5 \ - --num_steps 5 \ - --height 512 \ - --width 512 \ - --num_inference_steps 50 \ - --scheduler klms \ - --disable_tqdm \ - --make_video \ - --use_lerp_for_text \ - --do_loop +``` +huggingface-cli login ``` +#### Usage + +```python +from stable_diffusion_videos import walk + +walk( + prompts=['a cat', 'a dog'], + seeds=[42, 1337], + output_dir='dreams', # Where images/videos will be saved + name='animals_test', # Subdirectory of output_dir where images/videos will be saved + guidance_scale=8.5, # Higher adheres to prompt more, lower lets model take the wheel + num_steps=5, # Change to 60-200 for better results...3-5 for testing + num_inference_steps=50, + scheduler='klms', # One of: "klms", "default", "ddim" + disable_tqdm=False, # Set to True to disable tqdm progress bar + make_video=True, # If false, just save images + use_lerp_for_text=True, # Use lerp for text embeddings instead of slerp + do_loop=False, # Change to True if you want last prompt to loop back to first prompt +) +``` ## Credits diff --git a/requirements.txt b/requirements.txt index 5ee12c7..b340ccc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ transformers -git+https://github.com/huggingface/diffusers@f085d2f5c6569a1c0d90327c51328622036ef76e +diffusers==0.2.4 scipy fire gradio \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f9c0261 --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +from setuptools import find_packages, setup + + +def get_version() -> str: + rel_path = "stable_diffusion_videos/__init__.py" + with open(rel_path, "r") as fp: + for line in fp.read().splitlines(): + if line.startswith("__version__"): + delim = '"' if '"' in line else "'" + return line.split(delim)[1] + raise RuntimeError("Unable to find version string.") + + +with open("requirements.txt", "r") as f: + requirements = f.read().splitlines() + +setup( + name="stable_diffusion_videos", + version=get_version(), + author="Nathan Raw", + author_email="naterawdata@gmail.com", + description=( + "Create πŸ”₯ videos with Stable Diffusion by exploring the latent space and morphing between text prompts." + ), + license="Apache", + install_requires=requirements, + packages=find_packages(), +) diff --git a/stable_diffusion_videos.ipynb b/stable_diffusion_videos.ipynb index 212ca54..7eba97f 100644 --- a/stable_diffusion_videos.ipynb +++ b/stable_diffusion_videos.ipynb @@ -5,7 +5,7 @@ "colab": { "provenance": [], "collapsed_sections": [], - "authorship_tag": "ABX9TyN/ZOFCUNqBdfOYeo31y+2Q", + "authorship_tag": "ABX9TyMDsciHN/HhWLYEdURcy00d", "include_colab_link": true }, "kernelspec": { @@ -26,7 +26,7 @@ "colab_type": "text" }, "source": [ - "\"Open" + "\"Open" ] }, { @@ -47,7 +47,7 @@ "Enjoy πŸ€—" ], "metadata": { - "id": "B4V57sVzLHu3" + "id": "z4GhhH25OdYq" } }, { @@ -56,21 +56,19 @@ "## Setup" ], "metadata": { - "id": "0L9zhmONL81f" + "id": "dvdCBpWWOhW-" } }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { - "id": "klUR9ie1DVm-" + "id": "Xwfc0ej1L9A0" }, "outputs": [], "source": [ - "%%capture\n", - "! git clone https://github.com/nateraw/stable-diffusion-videos\n", - "%cd /content/stable-diffusion-videos/\n", - "! pip install -r requirements.txt\n", + "# ! pip install stable_diffusion_videos\n", + "! pip install git+https://github.com/nateraw/stable-diffusion-videos@package-try-2\n", "! git config --global credential.helper store" ] }, @@ -82,7 +80,7 @@ "You have to be a registered user in πŸ€— Hugging Face Hub, and you'll also need to use an access token for the code to work. For more information on access tokens, please refer to [this section of the documentation](https://huggingface.co/docs/hub/security-tokens)." ], "metadata": { - "id": "BoTBdktZDs8B" + "id": "dR5iVGYbOky5" } }, { @@ -93,7 +91,7 @@ "notebook_login()" ], "metadata": { - "id": "8jVV1OLBDZ8o" + "id": "GmejIGhFMTXG" }, "execution_count": null, "outputs": [] @@ -104,7 +102,17 @@ "## Run the App πŸš€" ], "metadata": { - "id": "TbW39aWzIdsn" + "id": "H7UOKJhVOonb" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Optional - Connect Google Drive\n", + "\n" + ], + "metadata": { + "id": "GjfrKeeR2NQZ" } }, { @@ -115,19 +123,43 @@ "This step will take a couple minutes the first time you run it." ], "metadata": { - "id": "jCWuRT78Jt0L" + "id": "g71hslP8OntM" } }, { "cell_type": "code", "source": [ - "# in case you restarted runtime, we need to be in this directory\n", - "%cd /content/stable-diffusion-videos/\n", + "from stable_diffusion_videos import interface" + ], + "metadata": { + "id": "bgSNS368L-DV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#@title Connect to Google Drive to Save Outputs\n", + "\n", + "#@markdown If you want to connect Google Drive, click the checkbox below and run this cell. You'll be prompted to authenticate.\n", + "\n", + "#@markdown If you just want to save your outputs in this Colab session, don't worry about this cell\n", "\n", - "from app import interface" + "connect_google_drive = True #@param {type:\"boolean\"}\n", + "\n", + "#@markdown Then, in the interface, use this path as the `output` in the Video tab to save your videos to Google Drive:\n", + "\n", + "#@markdown > /content/gdrive/MyDrive/stable_diffusion_videos\n", + "\n", + "\n", + "if connect_google_drive:\n", + " from google.colab import drive\n", + "\n", + " drive.mount('/content/gdrive')" ], "metadata": { - "id": "a6Eey_-YDvFc" + "id": "kidtsR3c2P9Z" }, "execution_count": null, "outputs": [] @@ -147,10 +179,12 @@ "2. Generate videos using the \"Videos\" tab\n", " - Using the images you found from the step above, provide the prompts/seeds you recorded\n", " - Set the `num_walk_steps` - for testing you can use a small number like 3 or 5, but to get great results you'll want to use something larger (60-200 steps). \n", - " - You can (and should) use the `name` input to separate out where the images/videos are saved. " + " - You can (and should) use the `name` input to separate out where the images/videos are saved. \n", + "\n", + "πŸ’‘ **Pro tip** - Click the link that looks like `https://.gradio.app` below , and you'll be able to view it in full screen." ], "metadata": { - "id": "Po9vuzMnJzka" + "id": "VxjRVNnMOtgU" } }, { @@ -159,7 +193,80 @@ "interface.launch()" ], "metadata": { - "id": "fflAEZaLIYGP" + "id": "8es3_onUOL3J" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "---" + ], + "metadata": { + "id": "mFCoTvlnPi4u" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Use `walk` programatically\n", + "\n", + "The other option is to not use the interface, and instead use `walk` programatically. Here's how you would do that..." + ], + "metadata": { + "id": "SjTQLCiLOWeo" + } + }, + { + "cell_type": "markdown", + "source": [ + "First we define a helper fn for visualizing videos in colab" + ], + "metadata": { + "id": "fGQPClGwOR9R" + } + }, + { + "cell_type": "code", + "source": [ + "from IPython.display import HTML\n", + "from base64 import b64encode\n", + "\n", + "def visualize_video_colab(video_path):\n", + " mp4 = open(video_path,'rb').read()\n", + " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", + " return HTML(\"\"\"\n", + " \n", + " \"\"\" % data_url)" + ], + "metadata": { + "id": "GqTWc8ZhNeLU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Walk! πŸšΆβ€β™€οΈ" + ], + "metadata": { + "id": "Vd_RzwkoPM7X" + } + }, + { + "cell_type": "code", + "source": [ + "from stable_diffusion_videos import walk\n", + "\n", + "video_path = walk(['a cat', 'a dog'], [42, 1337], num_steps=3, make_video=True)\n", + "visualize_video_colab(video_path)" + ], + "metadata": { + "id": "Hv2wBZXXMQ-I" }, "execution_count": null, "outputs": [] diff --git a/stable_diffusion_videos/__init__.py b/stable_diffusion_videos/__init__.py new file mode 100644 index 0000000..f94c713 --- /dev/null +++ b/stable_diffusion_videos/__init__.py @@ -0,0 +1,118 @@ +# *********** +# vendored from https://github.com/scientific-python/lazy_loader +import importlib +import importlib.util +import inspect +import os +import sys +import types +import warnings + + +class _LazyImportWarning(Warning): + pass + + +def _attach(package_name, submodules=None, submod_attrs=None): + """Attach lazily loaded submodules, functions, or other attributes. + + Typically, modules import submodules and attributes as follows: + + ```py + import mysubmodule + import anothersubmodule + + from .foo import someattr + ``` + + The idea is to replace a package's `__getattr__`, `__dir__`, and + `__all__`, such that all imports work exactly the way they would + with normal imports, except that the import occurs upon first use. + + The typical way to call this function, replacing the above imports, is: + + ```python + __getattr__, __dir__, __all__ = lazy.attach( + __name__, + ['mysubmodule', 'anothersubmodule'], + {'foo': ['someattr']} + ) + ``` + This functionality requires Python 3.7 or higher. + + Args: + package_name (`str`): + Typically use `__name__`. + submodules (`set`): + List of submodules to attach. + submod_attrs (`dict`): + Dictionary of submodule -> list of attributes / functions. + These attributes are imported as they are used. + + Returns: + __getattr__, __dir__, __all__ + + """ + if submod_attrs is None: + submod_attrs = {} + + if submodules is None: + submodules = set() + else: + submodules = set(submodules) + + attr_to_modules = { + attr: mod for mod, attrs in submod_attrs.items() for attr in attrs + } + + __all__ = list(submodules | attr_to_modules.keys()) + + def __getattr__(name): + if name in submodules: + return importlib.import_module(f"{package_name}.{name}") + elif name in attr_to_modules: + submod_path = f"{package_name}.{attr_to_modules[name]}" + submod = importlib.import_module(submod_path) + attr = getattr(submod, name) + + # If the attribute lives in a file (module) with the same + # name as the attribute, ensure that the attribute and *not* + # the module is accessible on the package. + if name == attr_to_modules[name]: + pkg = sys.modules[package_name] + pkg.__dict__[name] = attr + + return attr + else: + raise AttributeError(f"No {package_name} attribute {name}") + + def __dir__(): + return __all__ + + if os.environ.get("EAGER_IMPORT", ""): + for attr in set(attr_to_modules.keys()) | submodules: + __getattr__(attr) + + return __getattr__, __dir__, list(__all__) + + + +__getattr__, __dir__, __all__ = _attach( + __name__, + submodules=[], + submod_attrs={ + "commands.user": ["notebook_login"], + "app": [ + "interface", + ], + "stable_diffusion_pipeline": [ + "StableDiffusionPipeline", + ], + "stable_diffusion_walk": [ + "walk", + "SCHEDULERS", + ] + }, +) + +__version__ = "0.1.1" \ No newline at end of file diff --git a/app.py b/stable_diffusion_videos/app.py similarity index 89% rename from app.py rename to stable_diffusion_videos/app.py index a2f29e9..725e3a2 100644 --- a/app.py +++ b/stable_diffusion_videos/app.py @@ -1,7 +1,9 @@ +import time + import gradio as gr import torch -from stable_diffusion_walk import SCHEDULERS, pipeline, walk +from .stable_diffusion_walk import SCHEDULERS, pipeline, walk def fn_images( @@ -35,7 +37,7 @@ def fn_videos( do_loop, disable_tqdm, use_lerp_for_text, - name, + output_dir, ): prompts = [prompt_1, prompt_2] seeds = [seed_1, seed_2] @@ -52,7 +54,8 @@ def fn_videos( num_steps=num_walk_steps, num_inference_steps=num_inference_steps, use_lerp_for_text=use_lerp_for_text, - name=name, + output_dir=output_dir, + name=time.strftime("%Y%m%d-%H%M%S"), scheduler=scheduler, disable_tqdm=disable_tqdm, ) @@ -74,9 +77,9 @@ def fn_videos( gr.Checkbox(False), gr.Checkbox(False), gr.Textbox( - "stable_diffusion_video", + "dreams", placeholder=( - "Name of this experiment. Change to avoid overwriting previous outputs" + "Folder where outputs will be saved. Each output will be saved in a new folder." ), ), ], diff --git a/stable_diffusion_pipeline.py b/stable_diffusion_videos/stable_diffusion_pipeline.py similarity index 93% rename from stable_diffusion_pipeline.py rename to stable_diffusion_videos/stable_diffusion_pipeline.py index 21068ef..c147a13 100644 --- a/stable_diffusion_pipeline.py +++ b/stable_diffusion_videos/stable_diffusion_pipeline.py @@ -1,5 +1,6 @@ import inspect import warnings +from tqdm.auto import tqdm from typing import List, Optional, Union import torch @@ -189,7 +190,7 @@ def __call__( # scale and decode the image latents with vae latents = 1 / 0.18215 * latents - image = self.vae.decode(latents).sample + image = self.vae.decode(latents) image = (image / 2 + 0.5).clamp(0, 1) image = image.cpu().permute(0, 2, 3, 1).numpy() @@ -219,3 +220,16 @@ def embed_text(self, text): with torch.no_grad(): embed = self.text_encoder(text_input.input_ids.to(self.device))[0] return embed + + def progress_bar(self, iterable): + if not hasattr(self, "_progress_bar_config"): + self._progress_bar_config = {} + elif not isinstance(self._progress_bar_config, dict): + raise ValueError( + f"`self._progress_bar_config` should be of type `dict`, but is {type(self._progress_bar_config)}." + ) + + return tqdm(iterable, **self._progress_bar_config) + + def set_progress_bar_config(self, **kwargs): + self._progress_bar_config = kwargs \ No newline at end of file diff --git a/stable_diffusion_walk.py b/stable_diffusion_videos/stable_diffusion_walk.py similarity index 98% rename from stable_diffusion_walk.py rename to stable_diffusion_videos/stable_diffusion_walk.py index 5a22c03..d742b85 100644 --- a/stable_diffusion_walk.py +++ b/stable_diffusion_videos/stable_diffusion_walk.py @@ -6,7 +6,7 @@ from diffusers.schedulers import (DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler) -from stable_diffusion_pipeline import StableDiffusionPipeline +from .stable_diffusion_pipeline import StableDiffusionPipeline pipeline = StableDiffusionPipeline.from_pretrained( "CompVis/stable-diffusion-v1-4",