any idea how to render a long video? how to modify to py code to chunk by chunk? #144

chaowenguo · 2025-01-03T09:07:53Z

any idea how to render a long video? If i use text2video-zero, i can run render a long video:

    pipeline = diffusers.StableDiffusionControlNetPipeline.from_single_file('https://huggingface.co/chaowenguo/pal/blob/main/chilloutMix-Ni.safetensors', config='chaowenguo/stable-diffusion-v1-5', safety_checker=None, controlnet=controlnet, use_safetensors=True, torch_dtype=torch.bfloat16).to(torch_xla.core.xla_model.xla_device())
    pipeline.scheduler = diffusers.DDIMScheduler.from_config(pipeline.scheduler.config)
    pipeline.unet.set_attn_processor(diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor2_0())
    pipeline.controlnet.set_attn_processor(diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor2_0())
    openpose = easy_dwpose.DWposeDetector()
    generator = torch.Generator()
    with imageio.get_reader(f'pose{index}.mp4') as reader, imageio.get_writer(f'sd{index}.mp4', fps=reader.get_meta_data().get('fps')) as writer:
        pose0 = openpose(PIL.Image.fromarray(reader.get_data(0)).resize((512, 960)))
        chunk_ids = numpy.arange(0, reader.count_frames(), 3)
        for _ in builtins.range(builtins.len(chunk_ids)):
            ch_start = chunk_ids[_]
            ch_end = reader.count_frames() if _ == builtins.len(chunk_ids) - 1 else chunk_ids[_ + 1]
            for _ in pipeline(prompt=['A gorgeous smiling japanese girl, befautiful face, hands with five fingers, light background, best quality, extremely detailed, HD, ultra-realistic, 8K, HQ, masterpiece, trending on artstation, art, smooth'] * (ch_end - ch_start + 1), negative_prompt=['monochrome, dark background, longbody, lowres, bad anatomy, bad hands, fused fingers, missing fingers, too many fingers, extra digit, fewer difits, cropped, worst quality, low quality, deformed body, bloated, ugly, unrealistic, extra hands and arms'] * (ch_end - ch_start + 1), image=[pose0, *(openpose(PIL.Image.fromarray(reader.get_data(_)).resize((512, 960))) for _ in builtins.range(ch_start, ch_end))], generator=generator.manual_seed(index), num_inference_steps=20).images[1:]: writer.append_data(numpy.asarray(_))
    del pipeline

Now i use the community version of render_a_video. is there any similar chunk by chunk example? So I can render a long video?

import sys
gmflow_dir = 'gmflow'
sys.path.insert(0, gmflow_dir)

from diffusers.utils import export_to_video
import torch, torch_xla, cv2, PIL.Image, diffusers, imageio, builtins, easy_dwpose, rerender_a_video

openpose = easy_dwpose.DWposeDetector()
with imageio.get_reader('pose0.mp4') as reader: frames = [reader.get_data(_) for _ in builtins.range(reader.count_frames())]

controlnet = diffusers.ControlNetModel.from_pretrained('chaowenguo/control_v11p_sd15_openpose', torch_dtype=torch.bfloat16, variant='fp16', use_safetensors=True)
pipeline = rerender_a_video.RerenderAVideoPipeline.from_single_file('https://huggingface.co/chaowenguo/pal/blob/main/chilloutMix-Ni.safetensors', config='chaowenguo/stable-diffusion-v1-5', safety_checker=None, controlnet=controlnet, use_safetensors=True, torch_dtype=torch.bfloat16).to(torch_xla.core.xla_model.xla_device())
pipeline.vae = diffusers.AutoencoderKL.from_single_file('https://huggingface.co/chaowenguo/pal/blob/main/vae-ft-mse-840000-ema-pruned.ckpt', torch_dtype=torch.bfloat16).to(torch_xla.core.xla_model.xla_device())
pipeline.scheduler = diffusers.DDIMScheduler.from_config(pipeline.scheduler.config)

frames = [PIL.Image.fromarray(frame) for frame in frames]
control_frames = [openpose(_) for _ in frames]

output_frames = pipeline(prompt='A gorgeous smiling japanese girl, befautiful face, hands with five fingers, light background, best quality, extremely detailed, HD, ultra-realistic, 8K, HQ, masterpiece, trending on artstation, art, smooth', frames=frames, control_frames=control_frames, num_inference_steps=20,
    strength=0.75,
    controlnet_conditioning_scale=0.7,
    generator=torch.manual_seed(0),
    warp_start=0.0,
    warp_end=0.1,
    mask_start=0.5,
    mask_end=0.8,
    mask_strength=0.5,
    negative_prompt='monochrome, dark background, longbody, lowres, bad anatomy, bad hands, fused fingers, missing fingers, too many fingers, extra digit, fewer difits, cropped, worst quality, low quality, deformed body, bloated, ugly, unrealistic, extra hands and arms').frames[0]

export_to_video(output_frames, "video.mp4", fps)

how to modify the above py code to chunk by chunk version? see the rerender_a_video from https://github.com/huggingface/diffusers/blob/main/examples/community/rerender_a_video.py I add some support to python_xla tpu support

The text was updated successfully, but these errors were encountered:

chaowenguo changed the title ~~any idea how to render a long video?~~ any idea how to render a long video? how to modify to py code to chunk by chunk? Jan 3, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

any idea how to render a long video? how to modify to py code to chunk by chunk? #144

any idea how to render a long video? how to modify to py code to chunk by chunk? #144

chaowenguo commented Jan 3, 2025 •

edited

Loading

any idea how to render a long video? how to modify to py code to chunk by chunk? #144

any idea how to render a long video? how to modify to py code to chunk by chunk? #144

Comments

chaowenguo commented Jan 3, 2025 • edited Loading

chaowenguo commented Jan 3, 2025 •

edited

Loading