|
12 | 12 | cache_dir = os.path.join("cache")
|
13 | 13 | os.makedirs(cache_dir, exist_ok=True)
|
14 | 14 | os.environ["XDG_CACHE_HOME"] = cache_dir
|
| 15 | +temp_dir = os.path.join("temp") |
| 16 | +os.makedirs(temp_dir, exist_ok=True) |
| 17 | +os.environ["TMPDIR"] = temp_dir |
15 | 18 | import gradio as gr
|
16 | 19 | import langdetect
|
17 | 20 | from datasets import load_dataset, Audio
|
@@ -102,6 +105,7 @@ def wrapper():
|
102 | 105 |
|
103 | 106 | # Diffusers import
|
104 | 107 | diffusers = lazy_import('diffusers', '')
|
| 108 | +BlipDiffusionPipeline = lazy_import('diffusers.pipelines', 'BlipDiffusionPipeline') |
105 | 109 | StableDiffusionPipeline = lazy_import('diffusers', 'StableDiffusionPipeline')
|
106 | 110 | StableDiffusion3Pipeline = lazy_import('diffusers', 'StableDiffusion3Pipeline')
|
107 | 111 | StableDiffusionXLPipeline = lazy_import('diffusers', 'StableDiffusionXLPipeline')
|
@@ -3380,6 +3384,54 @@ def generate_image_diffedit(source_prompt, source_negative_prompt, target_prompt
|
3380 | 3384 | flush()
|
3381 | 3385 |
|
3382 | 3386 |
|
| 3387 | +def generate_image_blip_diffusion(text_prompt_input, negative_prompt, cond_image, cond_subject, tgt_subject, |
| 3388 | + num_inference_steps, guidance_scale, height, width, output_format): |
| 3389 | + blip_diffusion_path = os.path.join("inputs", "image", "sd_models", "blip-diff") |
| 3390 | + |
| 3391 | + if not os.path.exists(blip_diffusion_path): |
| 3392 | + print("Downloading BlipDiffusion model...") |
| 3393 | + os.makedirs(blip_diffusion_path, exist_ok=True) |
| 3394 | + Repo.clone_from("https://huggingface.co/Salesforce/blipdiffusion", blip_diffusion_path) |
| 3395 | + print("BlipDiffusion model downloaded") |
| 3396 | + |
| 3397 | + try: |
| 3398 | + device = "cuda" if torch.cuda.is_available() else "cpu" |
| 3399 | + blip_diffusion_pipe = BlipDiffusionPipeline().BlipDiffusionPipeline.from_pretrained( |
| 3400 | + blip_diffusion_path, torch_dtype=torch.float16 |
| 3401 | + ).to(device) |
| 3402 | + |
| 3403 | + cond_image = Image.open(cond_image).convert("RGB") |
| 3404 | + |
| 3405 | + output = blip_diffusion_pipe( |
| 3406 | + text_prompt_input, |
| 3407 | + cond_image, |
| 3408 | + cond_subject, |
| 3409 | + tgt_subject, |
| 3410 | + guidance_scale=guidance_scale, |
| 3411 | + num_inference_steps=num_inference_steps, |
| 3412 | + negative_prompt=negative_prompt, |
| 3413 | + height=height, |
| 3414 | + width=width, |
| 3415 | + ).images |
| 3416 | + |
| 3417 | + today = datetime.now().date() |
| 3418 | + image_dir = os.path.join('outputs', f"BlipDiffusion_{today.strftime('%Y%m%d')}") |
| 3419 | + os.makedirs(image_dir, exist_ok=True) |
| 3420 | + image_filename = f"blip_diffusion_{datetime.now().strftime('%Y%m%d_%H%M%S')}.{output_format}" |
| 3421 | + image_path = os.path.join(image_dir, image_filename) |
| 3422 | + |
| 3423 | + output[0].save(image_path, format=output_format.upper()) |
| 3424 | + |
| 3425 | + return image_path, "Image generated successfully." |
| 3426 | + |
| 3427 | + except Exception as e: |
| 3428 | + return None, str(e) |
| 3429 | + |
| 3430 | + finally: |
| 3431 | + del blip_diffusion_pipe |
| 3432 | + flush() |
| 3433 | + |
| 3434 | + |
3383 | 3435 | def generate_image_animatediff(prompt, negative_prompt, input_video, strength, model_type, stable_diffusion_model_name, seed, motion_lora_name, num_frames, num_inference_steps,
|
3384 | 3436 | guidance_scale, width, height, clip_skip):
|
3385 | 3437 |
|
@@ -8550,6 +8602,32 @@ def reload_interface():
|
8550 | 8602 | submit_btn="Generate"
|
8551 | 8603 | )
|
8552 | 8604 |
|
| 8605 | +blip_diffusion_interface = gr.Interface( |
| 8606 | + fn=generate_image_blip_diffusion, |
| 8607 | + inputs=[ |
| 8608 | + gr.Textbox(label="Prompt"), |
| 8609 | + gr.Textbox(label="Negative Prompt", value=""), |
| 8610 | + gr.Image(label="Conditioning Image", type="filepath"), |
| 8611 | + gr.Textbox(label="Conditioning Subject"), |
| 8612 | + gr.Textbox(label="Target Subject"), |
| 8613 | + gr.Slider(minimum=1, maximum=100, value=30, step=1, label="Inference Steps"), |
| 8614 | + gr.Slider(minimum=0.1, maximum=30.0, value=8, step=0.1, label="Guidance Scale"), |
| 8615 | + gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Height"), |
| 8616 | + gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Width"), |
| 8617 | + gr.Radio(choices=["png", "jpeg"], label="Output Format", value="png") |
| 8618 | + ], |
| 8619 | + outputs=[ |
| 8620 | + gr.Image(type="filepath", label="Generated Image"), |
| 8621 | + gr.Textbox(label="Message") |
| 8622 | + ], |
| 8623 | + title="NeuroSandboxWebUI - BlipDiffusion", |
| 8624 | + description="This interface allows you to generate images using BlipDiffusion. Upload a conditioning image, provide text prompts and subjects, and customize generation parameters.", |
| 8625 | + allow_flagging="never", |
| 8626 | + clear_btn=None, |
| 8627 | + stop_btn="Stop", |
| 8628 | + submit_btn="Generate" |
| 8629 | +) |
| 8630 | + |
8553 | 8631 | animatediff_interface = gr.Interface(
|
8554 | 8632 | fn=generate_image_animatediff,
|
8555 | 8633 | inputs=[
|
@@ -10301,11 +10379,11 @@ def reload_interface():
|
10301 | 10379 | gr.TabbedInterface(
|
10302 | 10380 | [
|
10303 | 10381 | gr.TabbedInterface(
|
10304 |
| - [txt2img_interface, img2img_interface, depth2img_interface, marigold_interface, pix2pix_interface, controlnet_interface, latent_upscale_interface, supir_upscale_interface, sdxl_refiner_interface, inpaint_interface, outpaint_interface, gligen_interface, diffedit_interface, animatediff_interface, hotshotxl_interface, video_interface, ldm3d_interface, |
| 10382 | + [txt2img_interface, img2img_interface, depth2img_interface, marigold_interface, pix2pix_interface, controlnet_interface, latent_upscale_interface, supir_upscale_interface, sdxl_refiner_interface, inpaint_interface, outpaint_interface, gligen_interface, diffedit_interface, blip_diffusion_interface, animatediff_interface, hotshotxl_interface, video_interface, ldm3d_interface, |
10305 | 10383 | gr.TabbedInterface([sd3_txt2img_interface, sd3_img2img_interface, sd3_controlnet_interface, sd3_inpaint_interface],
|
10306 | 10384 | tab_names=["txt2img", "img2img", "controlnet", "inpaint"]),
|
10307 | 10385 | cascade_interface, t2i_ip_adapter_interface, ip_adapter_faceid_interface, riffusion_interface],
|
10308 |
| - tab_names=["txt2img", "img2img", "depth2img", "marigold", "pix2pix", "controlnet", "upscale(latent)", "upscale(SUPIR)", "refiner", "inpaint", "outpaint", "gligen", "diffedit", "animatediff", "hotshotxl", "video", "ldm3d", "sd3", "cascade", "t2i-ip-adapter", "ip-adapter-faceid", "riffusion"] |
| 10386 | + tab_names=["txt2img", "img2img", "depth2img", "marigold", "pix2pix", "controlnet", "upscale(latent)", "upscale(SUPIR)", "refiner", "inpaint", "outpaint", "gligen", "diffedit", "blip-diffusion", "animatediff", "hotshotxl", "video", "ldm3d", "sd3", "cascade", "t2i-ip-adapter", "ip-adapter-faceid", "riffusion"] |
10309 | 10387 | ),
|
10310 | 10388 | kandinsky_interface, flux_interface, hunyuandit_interface, lumina_interface, kolors_interface, auraflow_interface, wurstchen_interface, deepfloyd_if_interface, pixart_interface, playgroundv2_interface
|
10311 | 10389 | ],
|
|
0 commit comments