Skip to content

Commit a32162d

Browse files
authored
Merge pull request #204 from Dartvauder/dev
Dev
2 parents 33c302e + ab08868 commit a32162d

File tree

3 files changed

+99
-10
lines changed

3 files changed

+99
-10
lines changed

LaunchFiles/appEN.py

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
cache_dir = os.path.join("cache")
1313
os.makedirs(cache_dir, exist_ok=True)
1414
os.environ["XDG_CACHE_HOME"] = cache_dir
15+
temp_dir = os.path.join("temp")
16+
os.makedirs(temp_dir, exist_ok=True)
17+
os.environ["TMPDIR"] = temp_dir
1518
import gradio as gr
1619
import langdetect
1720
from datasets import load_dataset, Audio
@@ -102,6 +105,7 @@ def wrapper():
102105

103106
# Diffusers import
104107
diffusers = lazy_import('diffusers', '')
108+
BlipDiffusionPipeline = lazy_import('diffusers.pipelines', 'BlipDiffusionPipeline')
105109
StableDiffusionPipeline = lazy_import('diffusers', 'StableDiffusionPipeline')
106110
StableDiffusion3Pipeline = lazy_import('diffusers', 'StableDiffusion3Pipeline')
107111
StableDiffusionXLPipeline = lazy_import('diffusers', 'StableDiffusionXLPipeline')
@@ -3380,6 +3384,54 @@ def generate_image_diffedit(source_prompt, source_negative_prompt, target_prompt
33803384
flush()
33813385

33823386

3387+
def generate_image_blip_diffusion(text_prompt_input, negative_prompt, cond_image, cond_subject, tgt_subject,
3388+
num_inference_steps, guidance_scale, height, width, output_format):
3389+
blip_diffusion_path = os.path.join("inputs", "image", "sd_models", "blip-diff")
3390+
3391+
if not os.path.exists(blip_diffusion_path):
3392+
print("Downloading BlipDiffusion model...")
3393+
os.makedirs(blip_diffusion_path, exist_ok=True)
3394+
Repo.clone_from("https://huggingface.co/Salesforce/blipdiffusion", blip_diffusion_path)
3395+
print("BlipDiffusion model downloaded")
3396+
3397+
try:
3398+
device = "cuda" if torch.cuda.is_available() else "cpu"
3399+
blip_diffusion_pipe = BlipDiffusionPipeline().BlipDiffusionPipeline.from_pretrained(
3400+
blip_diffusion_path, torch_dtype=torch.float16
3401+
).to(device)
3402+
3403+
cond_image = Image.open(cond_image).convert("RGB")
3404+
3405+
output = blip_diffusion_pipe(
3406+
text_prompt_input,
3407+
cond_image,
3408+
cond_subject,
3409+
tgt_subject,
3410+
guidance_scale=guidance_scale,
3411+
num_inference_steps=num_inference_steps,
3412+
negative_prompt=negative_prompt,
3413+
height=height,
3414+
width=width,
3415+
).images
3416+
3417+
today = datetime.now().date()
3418+
image_dir = os.path.join('outputs', f"BlipDiffusion_{today.strftime('%Y%m%d')}")
3419+
os.makedirs(image_dir, exist_ok=True)
3420+
image_filename = f"blip_diffusion_{datetime.now().strftime('%Y%m%d_%H%M%S')}.{output_format}"
3421+
image_path = os.path.join(image_dir, image_filename)
3422+
3423+
output[0].save(image_path, format=output_format.upper())
3424+
3425+
return image_path, "Image generated successfully."
3426+
3427+
except Exception as e:
3428+
return None, str(e)
3429+
3430+
finally:
3431+
del blip_diffusion_pipe
3432+
flush()
3433+
3434+
33833435
def generate_image_animatediff(prompt, negative_prompt, input_video, strength, model_type, stable_diffusion_model_name, seed, motion_lora_name, num_frames, num_inference_steps,
33843436
guidance_scale, width, height, clip_skip):
33853437

@@ -8550,6 +8602,32 @@ def reload_interface():
85508602
submit_btn="Generate"
85518603
)
85528604

8605+
blip_diffusion_interface = gr.Interface(
8606+
fn=generate_image_blip_diffusion,
8607+
inputs=[
8608+
gr.Textbox(label="Prompt"),
8609+
gr.Textbox(label="Negative Prompt", value=""),
8610+
gr.Image(label="Conditioning Image", type="filepath"),
8611+
gr.Textbox(label="Conditioning Subject"),
8612+
gr.Textbox(label="Target Subject"),
8613+
gr.Slider(minimum=1, maximum=100, value=30, step=1, label="Inference Steps"),
8614+
gr.Slider(minimum=0.1, maximum=30.0, value=8, step=0.1, label="Guidance Scale"),
8615+
gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Height"),
8616+
gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Width"),
8617+
gr.Radio(choices=["png", "jpeg"], label="Output Format", value="png")
8618+
],
8619+
outputs=[
8620+
gr.Image(type="filepath", label="Generated Image"),
8621+
gr.Textbox(label="Message")
8622+
],
8623+
title="NeuroSandboxWebUI - BlipDiffusion",
8624+
description="This interface allows you to generate images using BlipDiffusion. Upload a conditioning image, provide text prompts and subjects, and customize generation parameters.",
8625+
allow_flagging="never",
8626+
clear_btn=None,
8627+
stop_btn="Stop",
8628+
submit_btn="Generate"
8629+
)
8630+
85538631
animatediff_interface = gr.Interface(
85548632
fn=generate_image_animatediff,
85558633
inputs=[
@@ -10301,11 +10379,11 @@ def reload_interface():
1030110379
gr.TabbedInterface(
1030210380
[
1030310381
gr.TabbedInterface(
10304-
[txt2img_interface, img2img_interface, depth2img_interface, marigold_interface, pix2pix_interface, controlnet_interface, latent_upscale_interface, supir_upscale_interface, sdxl_refiner_interface, inpaint_interface, outpaint_interface, gligen_interface, diffedit_interface, animatediff_interface, hotshotxl_interface, video_interface, ldm3d_interface,
10382+
[txt2img_interface, img2img_interface, depth2img_interface, marigold_interface, pix2pix_interface, controlnet_interface, latent_upscale_interface, supir_upscale_interface, sdxl_refiner_interface, inpaint_interface, outpaint_interface, gligen_interface, diffedit_interface, blip_diffusion_interface, animatediff_interface, hotshotxl_interface, video_interface, ldm3d_interface,
1030510383
gr.TabbedInterface([sd3_txt2img_interface, sd3_img2img_interface, sd3_controlnet_interface, sd3_inpaint_interface],
1030610384
tab_names=["txt2img", "img2img", "controlnet", "inpaint"]),
1030710385
cascade_interface, t2i_ip_adapter_interface, ip_adapter_faceid_interface, riffusion_interface],
10308-
tab_names=["txt2img", "img2img", "depth2img", "marigold", "pix2pix", "controlnet", "upscale(latent)", "upscale(SUPIR)", "refiner", "inpaint", "outpaint", "gligen", "diffedit", "animatediff", "hotshotxl", "video", "ldm3d", "sd3", "cascade", "t2i-ip-adapter", "ip-adapter-faceid", "riffusion"]
10386+
tab_names=["txt2img", "img2img", "depth2img", "marigold", "pix2pix", "controlnet", "upscale(latent)", "upscale(SUPIR)", "refiner", "inpaint", "outpaint", "gligen", "diffedit", "blip-diffusion", "animatediff", "hotshotxl", "video", "ldm3d", "sd3", "cascade", "t2i-ip-adapter", "ip-adapter-faceid", "riffusion"]
1030910387
),
1031010388
kandinsky_interface, flux_interface, hunyuandit_interface, lumina_interface, kolors_interface, auraflow_interface, wurstchen_interface, deepfloyd_if_interface, pixart_interface, playgroundv2_interface
1031110389
],

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ The goal of the project - to create the easiest possible application to use neur
1212

1313
### Text: <img width="1118" alt="1" src="https://github.com/user-attachments/assets/d0947d54-eb8b-4f20-986b-579f9652ff95">
1414

15-
### Image: <img width="1118" alt="2" src="https://github.com/user-attachments/assets/39506653-23e1-432b-b250-362146a693a5">
15+
### Image: <img width="1112" alt="2" src="https://github.com/user-attachments/assets/02085575-1ae3-4e71-93eb-499c3103623a">
1616

1717
### Video: <img width="1115" alt="3" src="https://github.com/user-attachments/assets/032b248e-1ea8-4661-8a96-267e4a9ef01c">
1818

@@ -31,7 +31,7 @@ The goal of the project - to create the easiest possible application to use neur
3131
* Flexible and optimized interface (By Gradio)
3232
* Debug logging to logs from `Install` and `Update` files
3333
* Support for Transformers and llama.cpp models (LLM)
34-
* Support for diffusers and safetensors models (StableDiffusion) - txt2img, img2img, depth2img, marigold, pix2pix, controlnet, upscale (latent), upscale (SUPIR), refiner, inpaint, outpaint, gligen, diffedit, animatediff, hotshot-xl, video, ldm3d, sd3, cascade, t2i-ip-adapter, ip-adapter-faceid and riffusion tabs
34+
* Support for diffusers and safetensors models (StableDiffusion) - txt2img, img2img, depth2img, marigold, pix2pix, controlnet, upscale (latent), upscale (SUPIR), refiner, inpaint, outpaint, gligen, diffedit, blip-diffusion, animatediff, hotshot-xl, video, ldm3d, sd3, cascade, t2i-ip-adapter, ip-adapter-faceid and riffusion tabs
3535
* Support for stable-diffusion-cpp models for FLUX
3636
* Support of additional models for image generation: Kandinsky (txt2img, img2img, inpaint), Flux (with LoRA support), HunyuanDiT (txt2img, controlnet), Lumina-T2X, Kolors (txt2img with LoRA support, img2img, ip-adapter-plus), AuraFlow (with LoRA and AuraSR support), Würstchen, DeepFloydIF (txt2img, img2img, inpaint), PixArt and PlaygroundV2.5
3737
* Support Extras with Rembg, CodeFormer, PixelOE, DDColor, DownScale, Format changer, FaceSwap (Roop) and Upscale (Real-ESRGAN) models for image, video and audio
@@ -40,7 +40,7 @@ The goal of the project - to create the easiest possible application to use neur
4040
* Support AudioLDM 2 (Models: audio and music)
4141
* Supports TTS and Whisper models (For LLM and TTS-STT)
4242
* Support MMS for text-to-speech and speech-to-text
43-
* Supports Lora, Textual inversion (embedding), Vae, MagicPrompt, Img2img, Depth, Marigold, Pix2Pix, Controlnet, Upscalers (latent and SUPIR), Refiner, Inpaint, Outpaint, GLIGEN, DiffEdit, AnimateDiff, HotShot-XL, Videos, LDM3D, SD3, Cascade, T2I-IP-ADAPTER, IP-Adapter-FaceID and Riffusion models (For StableDiffusion)
43+
* Supports Lora, Textual inversion (embedding), Vae, MagicPrompt, Img2img, Depth, Marigold, Pix2Pix, Controlnet, Upscalers (latent and SUPIR), Refiner, Inpaint, Outpaint, GLIGEN, DiffEdit, BLIP-Diffusion, AnimateDiff, HotShot-XL, Videos, LDM3D, SD3, Cascade, T2I-IP-ADAPTER, IP-Adapter-FaceID and Riffusion models (For StableDiffusion)
4444
* Support Multiband Diffusion model (For AudioCraft)
4545
* Support LibreTranslate (Local API) and SeamlessM4Tv2 for language translations
4646
* Support ModelScope, ZeroScope 2, CogVideoX and Latte for video generation
@@ -213,6 +213,7 @@ First of all, I want to thank the developers of [PyCharm](https://www.jetbrains.
213213
* [SUPIR](https://github.com/Fanghua-Yu/SUPIR/blob/master/LICENSE)
214214
* [MagicPrompt](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/mit.md)
215215
* [Marigold](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md)
216+
* [BLIP-Diffusion](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md)
216217

217218
#### These third-party repository codes are also used in my project:
218219

Wikies/WikiEN.md

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050

5151
# Image:
5252

53-
### StableDiffusion - has twenty three sub-tabs:
53+
### StableDiffusion - has twenty four sub-tabs:
5454

5555
#### txt2img:
5656

@@ -162,9 +162,19 @@
162162

163163
#### diffedit:
164164

165-
1) Enter your Source Prompt and Source Negative Prompt for image masking (+ and - for prompt weighting)
166-
2) Enter your Target Prompt and Target Negative Prompt for image diffediting (+ and - for prompt weighting)
167-
3) Click the `Submit` button to get the generated image
165+
1) Enter your Source Prompt and Source Negative Prompt for image masking
166+
2) Enter your Target Prompt and Target Negative Prompt for image diff-editing
167+
3) Upload the initial image
168+
4) Set up the model according to the parameters you need
169+
5) Click the `Submit` button to get the generated image
170+
171+
#### blip-diffusion:
172+
173+
1) Enter your Prompt
174+
2) Upload the initial image
175+
3) Enter your Conditioning and Target Subjects
176+
4) Set up the model according to the parameters you need
177+
5) Click the `Submit` button to get the generated image
168178

169179
#### animatediff:
170180

@@ -478,7 +488,7 @@
478488
* LLM models can be taken from [HuggingFace](https://huggingface.co/models) or from ModelDownloader inside interface
479489
* StableDiffusion, vae, inpaint, embedding and lora models can be taken from [CivitAI](https://civitai.com/models) or from ModelDownloader inside interface
480490
* RVC models can be taken from [VoiceModels](https://voice-models.com)
481-
* StableAudio, AudioCraft, AudioLDM 2, TTS, Whisper, MMS, SeamlessM4Tv2, Wav2Lip, LivePortrait, SunoBark, MoonDream2, Upscalers (Latent and Real-ESRGAN), Refiner, GLIGEN, DiffEdit, Depth, Marigold, Pix2Pix, Controlnet, AnimateDiff, HotShot-XL, Videos, LDM3D, SD3, Cascade, T2I-IP-ADAPTER, IP-Adapter-FaceID, Riffusion, Rembg, Roop, CodeFormer, DDColor, PixelOE, Real-ESRGAN, StableFast3D, Shap-E, SV34D, Zero123Plus, UVR, Demucs, Kandinsky, Flux, HunyuanDiT, Lumina-T2X, Kolors, AuraFlow, AuraSR, Würstchen, DeepFloydIF, PixArt, PlaygroundV2.5, ModelScope, ZeroScope 2, CogVideoX, MagicPrompt, Latte and Multiband diffusion models are downloads automatically in *inputs* folder when are they used
491+
* StableAudio, AudioCraft, AudioLDM 2, TTS, Whisper, MMS, SeamlessM4Tv2, Wav2Lip, LivePortrait, SunoBark, MoonDream2, Upscalers (Latent and Real-ESRGAN), Refiner, GLIGEN, DiffEdit, BLIP-Diffusion, Depth, Marigold, Pix2Pix, Controlnet, AnimateDiff, HotShot-XL, Videos, LDM3D, SD3, Cascade, T2I-IP-ADAPTER, IP-Adapter-FaceID, Riffusion, Rembg, Roop, CodeFormer, DDColor, PixelOE, Real-ESRGAN, StableFast3D, Shap-E, SV34D, Zero123Plus, UVR, Demucs, Kandinsky, Flux, HunyuanDiT, Lumina-T2X, Kolors, AuraFlow, AuraSR, Würstchen, DeepFloydIF, PixArt, PlaygroundV2.5, ModelScope, ZeroScope 2, CogVideoX, MagicPrompt, Latte and Multiband diffusion models are downloads automatically in *inputs* folder when are they used
482492
* You can take voices anywhere. Record yours or take a recording from the Internet. Or just use those that are already in the project. The main thing is that it is pre-processed!
483493

484494
## Known Bugs:

0 commit comments

Comments
 (0)